Add Blitter support for CopyBufferRect

Related-To: NEO-4013
Change-Id: Id0d114a89b2b42a6385cca5afaa162e577c10ffb
Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
This commit is contained in:
Krzysztof Gibala
2020-04-02 11:47:30 +02:00
committed by sys_ocldev
parent 27f688ea83
commit 7b9767636b
7 changed files with 159 additions and 21 deletions

View File

@ -618,7 +618,7 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
bool commandAllowed = (CL_COMMAND_READ_BUFFER == cmdType) || (CL_COMMAND_WRITE_BUFFER == cmdType) ||
(CL_COMMAND_COPY_BUFFER == cmdType) || (CL_COMMAND_READ_BUFFER_RECT == cmdType) ||
(CL_COMMAND_WRITE_BUFFER_RECT == cmdType);
(CL_COMMAND_WRITE_BUFFER_RECT == cmdType) || (CL_COMMAND_COPY_BUFFER_RECT == cmdType);
return commandAllowed && blitAllowed;
}

View File

@ -25,7 +25,11 @@ struct ClBlitProperties {
return BlitProperties::constructPropertiesForCopyBuffer(builtinOpParams.dstMemObj->getGraphicsAllocation(),
builtinOpParams.srcMemObj->getGraphicsAllocation(),
dstOffset, srcOffset, builtinOpParams.size.x);
{dstOffset, builtinOpParams.dstOffset.y, builtinOpParams.dstOffset.z},
{srcOffset, builtinOpParams.srcOffset.y, builtinOpParams.srcOffset.z},
builtinOpParams.size,
builtinOpParams.srcRowPitch, builtinOpParams.srcSlicePitch,
builtinOpParams.dstRowPitch, builtinOpParams.dstSlicePitch);
}
GraphicsAllocation *gpuAllocation = nullptr;
@ -109,6 +113,8 @@ struct ClBlitProperties {
return BlitterConstants::BlitDirection::HostPtrToBuffer;
} else if (CL_COMMAND_READ_BUFFER == commandType || CL_COMMAND_READ_BUFFER_RECT == commandType) {
return BlitterConstants::BlitDirection::BufferToHostPtr;
} else if (CL_COMMAND_COPY_BUFFER_RECT == commandType) {
return BlitterConstants::BlitDirection::BufferToBuffer;
} else {
UNRECOVERABLE_IF(CL_COMMAND_COPY_BUFFER != commandType);
return BlitterConstants::BlitDirection::BufferToBuffer;

View File

@ -1164,7 +1164,7 @@ HWTEST_F(AubCommandStreamReceiverTests, WhenBlitBufferIsCalledThenCounterIsCorre
EXPECT_EQ(0u, aubCsr->blitBufferCalled);
MockGraphicsAllocation allocation(reinterpret_cast<void *>(0x1000), 0);
BlitProperties blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&allocation, &allocation, 0, 0, 0);
BlitProperties blitProperties = BlitProperties::constructPropertiesForCopyBuffer(&allocation, &allocation, 0, 0, 0, 0, 0, 0, 0);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
aubCsr->blitBuffer(blitPropertiesContainer, true);

View File

@ -784,6 +784,80 @@ HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenP
}
}
HWTEST_P(BcsDetaliedTestsWithParams, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommandsForCopyBufferRect) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
static_cast<OsAgnosticMemoryManager *>(csr.getMemoryManager())->turnOnFakingBigAllocations();
uint32_t bltLeftover = 17;
Vec3<size_t> bltSize = std::get<0>(GetParam()).copySize;
size_t numberOfBltsForSingleBltSizeProgramm = 3;
size_t totalNumberOfBits = numberOfBltsForSingleBltSizeProgramm * bltSize.y * bltSize.z;
cl_int retVal = CL_SUCCESS;
auto buffer1 = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast<size_t>(8 * BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight), nullptr, retVal));
Vec3<size_t> buffer1Offset = std::get<0>(GetParam()).hostPtrOffset;
Vec3<size_t> buffer2Offset = std::get<0>(GetParam()).copyOffset;
size_t buffer1RowPitch = std::get<0>(GetParam()).dstRowPitch;
size_t buffer1SlicePitch = std::get<0>(GetParam()).dstSlicePitch;
size_t buffer2RowPitch = std::get<0>(GetParam()).srcRowPitch;
size_t buffer2SlicePitch = std::get<0>(GetParam()).srcSlicePitch;
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(buffer1->getGraphicsAllocation(), //dstAllocation
buffer1->getGraphicsAllocation(), //srcAllocation
buffer1Offset, //dstOffset
buffer2Offset, //srcOffset
bltSize, //copySize
buffer1RowPitch, //srcRowPitch
buffer1SlicePitch, //srcSlicePitch
buffer2RowPitch, //dstRowPitch
buffer2SlicePitch //dstSlicePitch
);
blitBuffer(&csr, blitProperties, true);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream);
auto &cmdList = hwParser.cmdList;
auto cmdIterator = cmdList.begin();
uint64_t offset = 0;
for (uint32_t i = 0; i < totalNumberOfBits; i++) {
auto bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*(cmdIterator++));
EXPECT_NE(nullptr, bltCmd);
uint32_t expectedWidth = static_cast<uint32_t>(BlitterConstants::maxBlitWidth);
uint32_t expectedHeight = static_cast<uint32_t>(BlitterConstants::maxBlitHeight);
if (i % numberOfBltsForSingleBltSizeProgramm == numberOfBltsForSingleBltSizeProgramm - 1) {
expectedWidth = bltLeftover;
expectedHeight = 1;
}
if (i % numberOfBltsForSingleBltSizeProgramm == 0) {
offset = 0;
}
EXPECT_EQ(expectedWidth, bltCmd->getTransferWidth());
EXPECT_EQ(expectedHeight, bltCmd->getTransferHeight());
EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch());
EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch());
auto rowIndex = (i / numberOfBltsForSingleBltSizeProgramm) % blitProperties.copySize.y;
auto sliceIndex = i / (numberOfBltsForSingleBltSizeProgramm * blitProperties.copySize.y);
auto dstAddr = NEO::BlitCommandsHelper<FamilyType>::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, rowIndex, sliceIndex);
auto srcAddr = NEO::BlitCommandsHelper<FamilyType>::calculateBlitCommandSourceBaseAddress(blitProperties, offset, rowIndex, sliceIndex);
EXPECT_EQ(dstAddr, bltCmd->getDestinationBaseAddress());
EXPECT_EQ(srcAddr, bltCmd->getSourceBaseAddress());
offset += (expectedWidth * expectedHeight);
}
}
INSTANTIATE_TEST_CASE_P(BcsDetaliedTest,
BcsDetaliedTestsWithParams,
::testing::Combine(
@ -1154,7 +1228,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(buffer1->getGraphicsAllocation(),
buffer2->getGraphicsAllocation(), 0, 0, 1);
buffer2->getGraphicsAllocation(), 0, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1505,7 +1579,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(buffer1->getGraphicsAllocation(),
buffer2->getGraphicsAllocation(),
buffer1Offset, buffer2Offset, 1);
{buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);

View File

@ -794,6 +794,9 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1);
hwInfo->capabilityTable.blitterOperationsSupported = false;
@ -806,6 +809,9 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0);
hwInfo->capabilityTable.blitterOperationsSupported = true;
@ -818,6 +824,9 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(-1);
hwInfo->capabilityTable.blitterOperationsSupported = true;
@ -830,25 +839,32 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, 0, nullptr, nullptr);
EXPECT_EQ(5u, bcsCsr->blitBufferCalled);
EXPECT_EQ(6u, bcsCsr->blitBufferCalled);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1);
hwInfo->capabilityTable.blitterOperationsSupported = true;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(6u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(7u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(8u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(9u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(9u, bcsCsr->blitBufferCalled);
EXPECT_EQ(10u, bcsCsr->blitBufferCalled);
commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(10u, bcsCsr->blitBufferCalled);
EXPECT_EQ(11u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, 0, nullptr, nullptr);
EXPECT_EQ(12u, bcsCsr->blitBufferCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispatchBlitWhenUnblocked) {
@ -874,26 +890,33 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispat
commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, 0, nullptr, nullptr);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(5u, bcsCsr->blitBufferCalled);
EXPECT_EQ(6u, bcsCsr->blitBufferCalled);
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(6u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(7u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(8u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(9u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(9u, bcsCsr->blitBufferCalled);
EXPECT_EQ(10u, bcsCsr->blitBufferCalled);
commandQueue->enqueueWriteBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(11u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, 0, nullptr, nullptr);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferCalledThenUseBcs) {
@ -917,6 +940,31 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferCalledThenUseBcs) {
EXPECT_EQ(bufferForBlt1->getGraphicsAllocation()->getGpuAddress(), copyBltCmd->getDestinationBaseAddress());
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferRectCalledThenUseBcs) {
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto bufferForBlt0 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
auto bufferForBlt1 = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
bufferForBlt0->forceDisallowCPUCopy = true;
bufferForBlt1->forceDisallowCPUCopy = true;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 2, 1};
cmdQ->enqueueCopyBufferRect(bufferForBlt0.get(), bufferForBlt1.get(), bufferOrigin, hostOrigin, region,
0, 0, 0, 0, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandQueue->getBcsCommandStreamReceiver()->getCS(0));
auto commandItor = find<XY_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
EXPECT_NE(hwParser.cmdList.end(), commandItor);
auto copyBltCmd = genCmdCast<XY_COPY_BLT *>(*commandItor);
EXPECT_EQ(bufferForBlt0->getGraphicsAllocation()->getGpuAddress(), copyBltCmd->getSourceBaseAddress());
EXPECT_EQ(bufferForBlt1->getGraphicsAllocation()->getGpuAddress(), copyBltCmd->getDestinationBaseAddress());
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedBlitEnqueueWhenUnblockingThenMakeResidentAllTimestampPackets) {
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getBcsCommandStreamReceiver());
bcsCsr->storeMakeResidentAllocations = true;

View File

@ -75,7 +75,11 @@ BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterCons
}
BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
size_t dstOffset, size_t srcOffset, size_t copySize) {
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch) {
copySize.y = copySize.y ? copySize.y : 1;
copySize.z = copySize.z ? copySize.z : 1;
return {
nullptr, // outputTimestampPacket
@ -86,9 +90,13 @@ BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocati
srcAllocation, // srcAllocation
dstAllocation->getGpuAddress(), // dstGpuAddress
srcAllocation->getGpuAddress(), // srcGpuAddress
{copySize, 1, 1}, // copySize
{dstOffset, 0, 0}, // dstOffset
{srcOffset, 0, 0}}; // srcOffset
copySize, // copySize
dstOffset, // dstOffset
srcOffset, // srcOffset
dstRowPitch, // dstRowPitch
dstSlicePitch, // dstSlicePitch
srcRowPitch, // srcRowPitch
srcSlicePitch}; // srcSlicePitch
}
BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,

View File

@ -41,7 +41,9 @@ struct BlitProperties {
size_t gpuRowPitch, size_t gpuSlicePitch);
static BlitProperties constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation,
size_t dstOffset, size_t srcOffset, size_t copySize);
Vec3<size_t> dstOffset, Vec3<size_t> srcOffset, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch);
static BlitProperties constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection,
GraphicsAllocation *allocation);