Add Blitter support for ReadBufferRect

Change-Id: I530acc5a2b70fcd82f8ca5db46a0fa474d493a47
Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
Related-To: NEO-4013
This commit is contained in:
Krzysztof Gibala
2020-02-25 09:55:13 +01:00
committed by sys_ocldev
parent ec6d73e632
commit 0d342ac1aa
13 changed files with 440 additions and 120 deletions

View File

@ -617,7 +617,7 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
}
bool commandAllowed = (CL_COMMAND_READ_BUFFER == cmdType) || (CL_COMMAND_WRITE_BUFFER == cmdType) ||
(CL_COMMAND_COPY_BUFFER == cmdType);
(CL_COMMAND_COPY_BUFFER == cmdType) || (CL_COMMAND_READ_BUFFER_RECT == cmdType);
return commandAllowed && blitAllowed;
}

View File

@ -35,15 +35,16 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
const cl_event *eventWaitList,
cl_event *event) {
const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT;
auto isMemTransferNeeded = true;
if (buffer->isMemObjZeroCopy()) {
size_t bufferOffset;
size_t hostOffset;
computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch);
isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_READ_BUFFER_RECT);
isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, cmdType);
}
if (!isMemTransferNeeded) {
return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER_RECT, blockingRead,
return enqueueMarkerForReadWriteOperation(buffer, ptr, cmdType, blockingRead,
numEventsInWaitList, eventWaitList, event);
}
@ -65,7 +66,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
if (region[0] != 0 &&
region[1] != 0 &&
region[2] != 0) {
bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true);
auto &csr = blitEnqueueAllowed(cmdType) ? *getBcsCommandStreamReceiver() : getGpgpuCommandStreamReceiver();
bool status = csr.createAllocationForHostSurface(hostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
@ -80,6 +82,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBufferRect(
dc.dstPtr = alignedDstPtr;
dc.srcOffset = bufferOrigin;
dc.dstOffset = hostOrigin;
dc.transferAllocation = hostPtrSurf.getAllocation();
dc.dstOffset.x += dstPtrOffset;
dc.size = region;
dc.srcRowPitch = bufferRowPitch;

View File

@ -7,6 +7,7 @@
#pragma once
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/vec.h"
#include "opencl/source/context/context_type.h"
#include "opencl/source/context/driver_diagnostics.h"
@ -136,7 +137,7 @@ class Context : public BaseObject<_cl_context> {
ContextType peekContextType() { return this->contextType; }
MOCKABLE_VIRTUAL BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const;
MOCKABLE_VIRTUAL BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3<size_t> size) const;
SchedulerKernel &getSchedulerKernel();

View File

@ -16,7 +16,7 @@ cl_int Context::processExtraProperties(cl_context_properties propertyType, cl_co
return CL_INVALID_PROPERTY;
}
BlitOperationResult Context::blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const {
BlitOperationResult Context::blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3<size_t> size) const {
return BlitOperationResult::Unsupported;
}
} // namespace NEO

View File

@ -18,6 +18,7 @@ struct ClBlitProperties {
static BlitProperties constructProperties(BlitterConstants::BlitDirection blitDirection,
CommandStreamReceiver &commandStreamReceiver,
const BuiltinOpParams &builtinOpParams) {
if (BlitterConstants::BlitDirection::BufferToBuffer == blitDirection) {
auto dstOffset = builtinOpParams.dstOffset.x + builtinOpParams.dstMemObj->getOffset();
auto srcOffset = builtinOpParams.srcOffset.x + builtinOpParams.srcMemObj->getOffset();
@ -28,21 +29,27 @@ struct ClBlitProperties {
}
GraphicsAllocation *gpuAllocation = nullptr;
size_t copyOffset = 0;
Vec3<size_t> copyOffset = 0;
void *hostPtr = nullptr;
size_t hostPtrOffset = 0;
Vec3<size_t> hostPtrOffset = 0;
uint64_t memObjGpuVa = 0;
uint64_t hostAllocGpuVa = 0;
GraphicsAllocation *hostAllocation = builtinOpParams.transferAllocation;
Vec3<size_t> copySize = 0;
size_t hostRowPitch = 0;
size_t hostSlicePitch = 0;
size_t gpuRowPitch = 0;
size_t gpuSlicePitch = 0;
if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection) {
// write buffer
hostPtr = builtinOpParams.srcPtr;
hostPtrOffset = builtinOpParams.srcOffset.x;
copyOffset = builtinOpParams.dstOffset.x;
hostPtrOffset = builtinOpParams.srcOffset;
copyOffset = builtinOpParams.dstOffset;
memObjGpuVa = castToUint64(builtinOpParams.dstPtr);
hostAllocGpuVa = castToUint64(builtinOpParams.srcPtr);
@ -54,14 +61,15 @@ struct ClBlitProperties {
gpuAllocation = builtinOpParams.dstMemObj->getGraphicsAllocation();
memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.dstMemObj->getOffset());
}
copySize.x = builtinOpParams.size.x;
}
if (BlitterConstants::BlitDirection::BufferToHostPtr == blitDirection) {
// read buffer
hostPtr = builtinOpParams.dstPtr;
hostPtrOffset = builtinOpParams.dstOffset.x;
copyOffset = builtinOpParams.srcOffset.x;
hostPtrOffset = builtinOpParams.dstOffset;
copyOffset = builtinOpParams.srcOffset;
memObjGpuVa = castToUint64(builtinOpParams.srcPtr);
hostAllocGpuVa = castToUint64(builtinOpParams.dstPtr);
@ -73,6 +81,12 @@ struct ClBlitProperties {
gpuAllocation = builtinOpParams.srcMemObj->getGraphicsAllocation();
memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.srcMemObj->getOffset());
}
hostRowPitch = builtinOpParams.dstRowPitch;
hostSlicePitch = builtinOpParams.dstSlicePitch;
gpuRowPitch = builtinOpParams.srcRowPitch;
gpuSlicePitch = builtinOpParams.srcSlicePitch;
copySize = builtinOpParams.size;
}
UNRECOVERABLE_IF(BlitterConstants::BlitDirection::HostPtrToBuffer != blitDirection &&
@ -80,13 +94,15 @@ struct ClBlitProperties {
return BlitProperties::constructPropertiesForReadWriteBuffer(blitDirection, commandStreamReceiver, gpuAllocation,
hostAllocation, hostPtr, memObjGpuVa, hostAllocGpuVa,
hostPtrOffset, copyOffset, builtinOpParams.size.x);
hostPtrOffset, copyOffset, copySize,
hostRowPitch, hostSlicePitch,
gpuRowPitch, gpuSlicePitch);
}
static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType) {
if (CL_COMMAND_WRITE_BUFFER == commandType) {
return BlitterConstants::BlitDirection::HostPtrToBuffer;
} else if (CL_COMMAND_READ_BUFFER == commandType) {
} else if (CL_COMMAND_READ_BUFFER == commandType || CL_COMMAND_READ_BUFFER_RECT == commandType) {
return BlitterConstants::BlitDirection::BufferToHostPtr;
} else {
UNRECOVERABLE_IF(CL_COMMAND_COPY_BUFFER != commandType);

View File

@ -305,7 +305,7 @@ Buffer *Buffer::create(Context *context,
bool gpuCopyRequired = (gmm && gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool());
if (gpuCopyRequired) {
auto blitMemoryToAllocationResult = context->blitMemoryToAllocation(*pBuffer, memory, hostPtr, size);
auto blitMemoryToAllocationResult = context->blitMemoryToAllocation(*pBuffer, memory, hostPtr, {size, 1, 1});
if (blitMemoryToAllocationResult != BlitOperationResult::Success) {
auto cmdQ = context->getSpecialQueue();

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/helpers/vec.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/utilities/base_object_utils.h"
@ -32,12 +33,12 @@ struct BlitAuxTranslationTests : public ::testing::Test {
bcsCsr->initializeTagAllocation();
}
BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const override {
BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3<size_t> size) const override {
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, memory, nullptr,
hostPtr,
memory->getGpuAddress(), 0,
0, 0, size);
0, 0, size, 0, 0, 0, 0);
BlitPropertiesContainer container;
container.push_back(blitProperties);

View File

@ -536,10 +536,12 @@ HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZ
}
HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueReadBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) {
hwInfo->capabilityTable.blitterOperationsSupported = false;
initializeFixture<FamilyType>();
if (device->areSharedSystemAllocationsAllowed()) {
GTEST_SKIP();
}
auto cmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), &properties);
buffer->forceDisallowCPUCopy = true;

View File

@ -341,14 +341,31 @@ struct BcsTests : public CommandStreamReceiverHwTest {
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
uint64_t notAlignedBltSize = (3 * max2DBlitSize) + 1;
uint64_t alignedBltSize = (3 * max2DBlitSize);
size_t notAlignedBltSize = (3 * max2DBlitSize) + 1;
size_t alignedBltSize = (3 * max2DBlitSize);
uint32_t alignedNumberOfBlts = 3;
uint32_t notAlignedNumberOfBlts = 4;
auto expectedAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts;
auto expectedNotAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({alignedBltSize, 1, 1}, csrDependencies, false);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({notAlignedBltSize, 1, 1}, csrDependencies, false);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
}
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
Vec3<size_t> notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2};
Vec3<size_t> alignedBltSize = {(3 * max2DBlitSize), 4, 2};
size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z;
size_t notAlignedNumberOfBlts = 4 * notAlignedBltSize.y * notAlignedBltSize.z;
auto expectedAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts;
auto expectedNotAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize, csrDependencies, false);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies, false);
@ -370,7 +387,7 @@ HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorr
HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCalculateForAllAttachedProperites) {
const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
const uint32_t numberOfBlts = 3;
const uint64_t bltSize = (3 * max2DBlitSize);
const size_t bltSize = (3 * max2DBlitSize);
const uint32_t numberOfBlitOperations = 4;
auto baseSize = EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
@ -378,6 +395,33 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCa
auto expectedAlignedSize = baseSize + MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo());
BlitPropertiesContainer blitPropertiesContainer;
for (uint32_t i = 0; i < numberOfBlitOperations; i++) {
BlitProperties blitProperties;
blitProperties.copySize = {bltSize, 1, 1};
blitPropertiesContainer.push_back(blitProperties);
expectedAlignedSize += expectedBlitInstructionsSize;
}
expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo());
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
}
HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForReadBufferRectThenCalculateForAllAttachedProperites) {
const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
const Vec3<size_t> bltSize = {(3 * max2DBlitSize), 4, 2};
const size_t numberOfBlts = 3 * bltSize.y * bltSize.z;
const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z;
auto baseSize = EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
auto expectedBlitInstructionsSize = sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts;
auto expectedAlignedSize = baseSize + MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo());
BlitPropertiesContainer blitPropertiesContainer;
for (uint32_t i = 0; i < numberOfBlitOperations; i++) {
BlitProperties blitProperties;
@ -400,8 +444,8 @@ HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommands
auto expectedSizeWithTimestampPacketWrite = expectedBaseSize + EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite();
auto expectedSizeWithoutTimestampPacketWrite = expectedBaseSize;
auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies, true);
auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies, false);
auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, true);
auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false);
EXPECT_EQ(expectedSizeWithTimestampPacketWrite, estimatedSizeWithTimestampPacketWrite);
EXPECT_EQ(expectedSizeWithoutTimestampPacketWrite, estimatedSizeWithoutTimestampPacketWrite);
@ -420,7 +464,7 @@ HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAd
size_t expectedSize = (sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts) +
TimestampPacketHelper::getRequiredCmdStreamSize<FamilyType>(csrDependencies);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies, false);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false);
EXPECT_EQ(expectedSize, estimatedSize);
}
@ -434,7 +478,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
static_cast<OsAgnosticMemoryManager *>(csr.getMemoryManager())->turnOnFakingBigAllocations();
uint32_t bltLeftover = 17;
uint64_t bltSize = (2 * max2DBlitSize) + bltLeftover;
size_t bltSize = (2 * max2DBlitSize) + bltLeftover;
uint32_t numberOfBlts = 3;
cl_int retVal = CL_SUCCESS;
@ -447,7 +491,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer->getGraphicsAllocation(), nullptr, hostPtr,
buffer->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, bltSize);
0, 0, {bltSize, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
EXPECT_EQ(newTaskCount, csr.taskCount);
@ -517,6 +561,107 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
}
}
HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommandsForReadBufferRect) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
static_cast<OsAgnosticMemoryManager *>(csr.getMemoryManager())->turnOnFakingBigAllocations();
uint32_t bltLeftover = 17;
Vec3<size_t> bltSize = {(2 * max2DBlitSize) + bltLeftover, 2, 2};
size_t numberOfBlts = 3 * bltSize.y * bltSize.z;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast<size_t>(bltSize.x * bltSize.y * bltSize.z), nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
uint32_t newTaskCount = 19;
csr.taskCount = newTaskCount - 1;
EXPECT_EQ(0u, csr.recursiveLockCounter.load());
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr,
csr, buffer->getGraphicsAllocation(), nullptr, hostPtr,
buffer->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, bltSize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
EXPECT_EQ(newTaskCount, csr.taskCount);
EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount);
EXPECT_EQ(newTaskCount, csr.latestSentTaskCount);
EXPECT_EQ(newTaskCount, csr.latestSentTaskCountValueDuringFlush);
EXPECT_EQ(1u, csr.recursiveLockCounter.load());
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream);
auto &cmdList = hwParser.cmdList;
auto cmdIterator = cmdList.begin();
uint64_t offset = 0;
for (uint32_t i = 0; i < numberOfBlts; i++) {
auto bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*(cmdIterator++));
EXPECT_NE(nullptr, bltCmd);
uint32_t expectedWidth = static_cast<uint32_t>(BlitterConstants::maxBlitWidth);
uint32_t expectedHeight = static_cast<uint32_t>(BlitterConstants::maxBlitHeight);
if (i % 3 == 2) {
expectedWidth = bltLeftover;
expectedHeight = 1;
}
EXPECT_EQ(expectedWidth, bltCmd->getTransferWidth());
EXPECT_EQ(expectedHeight, bltCmd->getTransferHeight());
EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch());
EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch());
auto dstAddr = NEO::BlitCommandsHelper<FamilyType>::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, i % bltSize.y, i % bltSize.z);
auto srcAddr = NEO::BlitCommandsHelper<FamilyType>::calculateBlitCommandSourceBaseAddress(blitProperties, offset, i % bltSize.y, i % bltSize.z);
EXPECT_EQ(dstAddr, bltCmd->getDestinationBaseAddress());
EXPECT_EQ(srcAddr, bltCmd->getSourceBaseAddress());
offset = (i % 3 == 2) ? 0 : offset + (expectedWidth * expectedHeight);
}
if (UnitTestHelper<FamilyType>::isSynchronizationWArequired(pDevice->getHardwareInfo())) {
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
}
auto miFlushCmd = genCmdCast<MI_FLUSH_DW *>(*(cmdIterator++));
if (UnitTestHelper<FamilyType>::additionalMiFlushDwRequired) {
uint64_t gpuAddress = 0x0;
uint64_t immData = 0;
EXPECT_NE(nullptr, miFlushCmd);
EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE, miFlushCmd->getPostSyncOperation());
EXPECT_EQ(gpuAddress, miFlushCmd->getDestinationAddress());
EXPECT_EQ(immData, miFlushCmd->getImmediateData());
miFlushCmd = genCmdCast<MI_FLUSH_DW *>(*(cmdIterator++));
}
EXPECT_NE(cmdIterator, cmdList.end());
EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation());
EXPECT_EQ(csr.getTagAllocation()->getGpuAddress(), miFlushCmd->getDestinationAddress());
EXPECT_EQ(newTaskCount, miFlushCmd->getImmediateData());
if (UnitTestHelper<FamilyType>::isSynchronizationWArequired(pDevice->getHardwareInfo())) {
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
}
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_BATCH_BUFFER_END *>(*(cmdIterator++)));
// padding
while (cmdIterator != cmdList.end()) {
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_NOOP *>(*(cmdIterator++)));
}
}
HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphoreAndAtomic) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
@ -529,7 +674,7 @@ HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaph
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer->getGraphicsAllocation(), nullptr, hostPtr,
buffer->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
@ -581,11 +726,11 @@ HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommands
auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1,
buffer1->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2,
buffer2->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp2(*csr.getTimestampPacketAllocator(), 1);
@ -640,12 +785,12 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations
auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1,
buffer1->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2,
buffer2->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties1);
@ -683,12 +828,12 @@ HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAl
auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1,
buffer1->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2,
buffer2->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties1);
@ -723,7 +868,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) {
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, buffer->getGraphicsAllocation(), nullptr, hostPtr,
buffer->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -774,7 +919,7 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
*myMockCsr, buffer->getGraphicsAllocation(), nullptr,
hostPtr,
buffer->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(myMockCsr.get(), blitProperties, false);
@ -805,7 +950,7 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) {
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
bcsCsr, buffer->getGraphicsAllocation(), nullptr, hostPtr,
buffer->getGraphicsAllocation()->getGpuAddress(), 0,
0, 0, 1);
0, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&bcsCsr, blitProperties, false);
@ -840,7 +985,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
nullptr, hostPtr,
subBuffer1->getGraphicsAllocation()->getGpuAddress() +
subBuffer1->getOffset(),
0, hostPtrOffset, 0, 1);
0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -862,7 +1007,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
nullptr, hostPtr,
subBuffer1->getGraphicsAllocation()->getGpuAddress() +
subBuffer1->getOffset(),
0, hostPtrOffset, 0, 1);
0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -875,6 +1020,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
}
EXPECT_EQ(subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress());
}
{
// Buffer to Buffer
HardwareParse hwParser;
@ -941,7 +1087,7 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
mapAllocation, mapPtr,
buffer->getGraphicsAllocation()->getGpuAddress(),
castToUint64(mapPtr),
hostPtrOffset, 0, 1);
{hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -963,7 +1109,29 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
csr, buffer->getGraphicsAllocation(),
mapAllocation, mapPtr,
buffer->getGraphicsAllocation()->getGpuAddress(),
castToUint64(mapPtr), hostPtrOffset, 0, 1);
castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
auto bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*hwParser.cmdList.begin());
EXPECT_NE(nullptr, bltCmd);
if (pDevice->isFullRangeSvm()) {
EXPECT_EQ(reinterpret_cast<uint64_t>(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress());
}
EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddress(), bltCmd->getSourceBaseAddress());
}
{
// bufferRect to hostPtr
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr,
csr, buffer->getGraphicsAllocation(),
mapAllocation, mapPtr,
buffer->getGraphicsAllocation()->getGpuAddress(),
castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -998,7 +1166,7 @@ HWTEST_F(BcsTests, givenMapAllocationInBuiltinOpParamsWhenConstructingThenUseItA
BuiltinOpParams builtinOpParams = {};
builtinOpParams.dstMemObj = buffer.get();
builtinOpParams.srcPtr = mapPtr;
builtinOpParams.size.x = 1;
builtinOpParams.size = {1, 1, 1};
builtinOpParams.transferAllocation = mapAllocation;
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer,
@ -1010,7 +1178,7 @@ HWTEST_F(BcsTests, givenMapAllocationInBuiltinOpParamsWhenConstructingThenUseItA
BuiltinOpParams builtinOpParams = {};
builtinOpParams.srcMemObj = buffer.get();
builtinOpParams.dstPtr = mapPtr;
builtinOpParams.size.x = 1;
builtinOpParams.size = {1, 1, 1};
builtinOpParams.transferAllocation = mapAllocation;
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr,
@ -1040,7 +1208,7 @@ HWTEST_F(BcsTests, givenNonZeroCopySvmAllocationWhenConstructingBlitPropertiesFo
builtinOpParams.dstSvmAlloc = svmData->gpuAllocation;
builtinOpParams.srcSvmAlloc = svmData->cpuAllocation;
builtinOpParams.srcPtr = reinterpret_cast<void *>(svmData->cpuAllocation->getGpuAddress());
builtinOpParams.size.x = 1;
builtinOpParams.size = {1, 1, 1};
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, builtinOpParams);
@ -1053,7 +1221,7 @@ HWTEST_F(BcsTests, givenNonZeroCopySvmAllocationWhenConstructingBlitPropertiesFo
builtinOpParams.srcSvmAlloc = svmData->gpuAllocation;
builtinOpParams.dstSvmAlloc = svmData->cpuAllocation;
builtinOpParams.dstPtr = reinterpret_cast<void *>(svmData->cpuAllocation->getGpuAddress());
builtinOpParams.size.x = 1;
builtinOpParams.size = {1, 1, 1};
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr,
csr, builtinOpParams);
@ -1087,7 +1255,7 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) {
builtinOpParams.srcSvmAlloc = svmData->gpuAllocation;
builtinOpParams.srcPtr = reinterpret_cast<void *>(svmData->cpuAllocation->getGpuAddress() + srcOffset);
builtinOpParams.dstPtr = reinterpret_cast<void *>(svmData->cpuAllocation->getGpuAddress() + dstOffset);
builtinOpParams.size.x = 1;
builtinOpParams.size = {1, 1, 1};
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, builtinOpParams);
@ -1111,7 +1279,7 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) {
builtinOpParams.dstSvmAlloc = svmData->cpuAllocation;
builtinOpParams.dstPtr = reinterpret_cast<void *>(svmData->cpuAllocation + dstOffset);
builtinOpParams.srcPtr = reinterpret_cast<void *>(svmData->gpuAllocation + srcOffset);
builtinOpParams.size.x = 1;
builtinOpParams.size = {1, 1, 1};
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr,
csr, builtinOpParams);
@ -1127,7 +1295,6 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) {
EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress());
EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress());
}
svmAllocsManager.freeSVMAlloc(svmAlloc);
}
@ -1150,7 +1317,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
csr, buffer1->getGraphicsAllocation(),
nullptr, hostPtr,
buffer1->getGraphicsAllocation()->getGpuAddress(),
0, 0, buffer1Offset, 1);
0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1171,7 +1338,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
csr, buffer1->getGraphicsAllocation(), nullptr,
hostPtr,
buffer1->getGraphicsAllocation()->getGpuAddress(),
0, 0, buffer1Offset, 1);
0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1184,7 +1351,6 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
}
EXPECT_EQ(ptrOffset(buffer1->getGraphicsAllocation()->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress());
}
for (auto buffer2Offset : addressOffsets) {
// Buffer to Buffer
HardwareParse hwParser;
@ -1240,6 +1406,12 @@ HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCor
}
}
HWTEST_F(BcsTests, givenInvalidBlitDirectionWhenConstructPropertiesThenExceptionIsThrow) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_THROW(ClBlitProperties::constructProperties(static_cast<BlitterConstants::BlitDirection>(7), csr, {}), std::exception);
}
struct MockScratchSpaceController : ScratchSpaceControllerBase {
using ScratchSpaceControllerBase::privateScratchAllocation;
using ScratchSpaceControllerBase::ScratchSpaceControllerBase;

View File

@ -10,6 +10,7 @@
#include "shared/source/gmm_helper/resource_info.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/vec.h"
#include "shared/source/memory_manager/allocations_list.h"
#include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
@ -665,12 +666,12 @@ struct BcsBufferTests : public ::testing::Test {
bcsCsr->initializeTagAllocation();
}
BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const override {
BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3<size_t> size) const override {
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
*bcsCsr, memory, nullptr,
hostPtr,
memory->getGpuAddress(), 0,
0, 0, size);
0, 0, size, 0, 0, 0, 0);
BlitPropertiesContainer container;
container.push_back(blitProperties);
@ -768,40 +769,60 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
bufferForBlt1->forceDisallowCPUCopy = true;
auto *hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo();
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 2, 1};
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0);
hwInfo->capabilityTable.blitterOperationsSupported = false;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1);
hwInfo->capabilityTable.blitterOperationsSupported = false;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0);
hwInfo->capabilityTable.blitterOperationsSupported = true;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(-1);
hwInfo->capabilityTable.blitterOperationsSupported = true;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(3u, bcsCsr->blitBufferCalled);
EXPECT_EQ(4u, bcsCsr->blitBufferCalled);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1);
hwInfo->capabilityTable.blitterOperationsSupported = true;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(4u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(5u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(6u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(7u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(8u, bcsCsr->blitBufferCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispatchBlitWhenUnblocked) {
@ -812,23 +833,34 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispat
bufferForBlt0->forceDisallowCPUCopy = true;
bufferForBlt1->forceDisallowCPUCopy = true;
UserEvent userEvent(bcsMockContext.get());
cl_event waitlist = &userEvent;
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 2, 1};
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt1.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(0u, bcsCsr->blitBufferCalled);
userEvent.setStatus(CL_COMPLETE);
EXPECT_EQ(3u, bcsCsr->blitBufferCalled);
EXPECT_EQ(4u, bcsCsr->blitBufferCalled);
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(4u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(5u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(6u, bcsCsr->blitBufferCalled);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
EXPECT_EQ(7u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferCalledThenUseBcs) {
@ -1345,6 +1377,53 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) {
auto myMockCsr = new MyMockCsr<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex());
myMockCsr->taskCount = 1234;
myMockCsr->initializeTagAllocation();
myMockCsr->setupContext(*bcsMockContext->bcsOsContext);
bcsMockContext->bcsCsr.reset(myMockCsr);
EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()};
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
cmdQ->bcsEngine = &bcsEngineControl;
auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
myMockCsr->gpgpuCsr = &gpgpuCsr;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->forceDisallowCPUCopy = true;
void *hostPtr = reinterpret_cast<void *>(0x12340000);
size_t bufferOrigin[] = {0, 0, 0};
size_t hostOrigin[] = {0, 0, 0};
size_t region[] = {1, 2, 1};
cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
bool tempAllocationFound = false;
auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead();
while (tempAllocation) {
if (tempAllocation->getUnderlyingBuffer() == hostPtr) {
tempAllocationFound = true;
break;
}
tempAllocation = tempAllocation->next;
}
EXPECT_TRUE(tempAllocationFound);
cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region,
MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize,
MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) {
auto myMockCsr = new MyMockCsr<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex());
myMockCsr->taskCount = 1234;