Enqueue Read/Write operations with blitter

- Program dependencies from Event and IOQ
- Obtain new TimestampPacket
- Update output TimestampPacket if needed

Change-Id: I4ad020f5c5b05ceca8b096fafe1257523e2bc343
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
Related-To: NEO-3020
This commit is contained in:
Dunajski, Bartosz
2019-06-13 11:45:27 +02:00
committed by sys_ocldev
parent aae31c3c1b
commit 3e88907201
13 changed files with 170 additions and 34 deletions

View File

@ -587,12 +587,20 @@ bool CommandQueue::bufferCpuCopyAllowed(Buffer *buffer, cl_command_type commandT
cl_int CommandQueue::enqueueReadWriteBufferWithBlitTransfer(cl_command_type commandType, Buffer *buffer,
size_t offset, size_t size, void *ptr, cl_uint numEventsInWaitList,
const cl_event *eventWaitList, cl_event *event) {
CsrDependencies csrDependencies;
auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*buffer);
EventsRequest eventsRequest(numEventsInWaitList, eventWaitList, event);
TimestampPacketContainer previousTimestampPacketNodes;
CsrDependencies csrDependencies;
csrDependencies.fillFromEventsRequestAndMakeResident(eventsRequest, *blitCommandStreamReceiver,
CsrDependencies::DependenciesType::All);
obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, queueDependenciesClearRequired());
csrDependencies.push_back(&previousTimestampPacketNodes);
auto copyDirection = (CL_COMMAND_WRITE_BUFFER == commandType) ? BlitterConstants::BlitWithHostPtrDirection::FromHostPtr
: BlitterConstants::BlitWithHostPtrDirection::ToHostPtr;
blitCommandStreamReceiver->blitWithHostPtr(*buffer, ptr, true, offset, size, copyDirection, csrDependencies);
blitCommandStreamReceiver->blitWithHostPtr(*buffer, ptr, true, offset, size, copyDirection, csrDependencies, *timestampPacketContainer);
return CL_SUCCESS;
}

View File

@ -419,7 +419,8 @@ cl_int CommandStreamReceiver::expectMemory(const void *gfxAddress, const void *s
}
void CommandStreamReceiver::blitWithHostPtr(Buffer &buffer, void *hostPtr, bool blocking, size_t bufferOffset, uint64_t copySize,
BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies) {
BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies,
const TimestampPacketContainer &outputTimestampPacket) {
HostPtrSurface hostPtrSurface(hostPtr, static_cast<size_t>(copySize), true);
bool success = createAllocationForHostSurface(hostPtrSurface, false);
UNRECOVERABLE_IF(!success);
@ -431,9 +432,9 @@ void CommandStreamReceiver::blitWithHostPtr(Buffer &buffer, void *hostPtr, bool
true, false, true));
if (BlitterConstants::BlitWithHostPtrDirection::FromHostPtr == copyDirection) {
blitBuffer(buffer, *hostPtrBuffer, blocking, bufferOffset, 0, copySize, csrDependencies);
blitBuffer(buffer, *hostPtrBuffer, blocking, bufferOffset, 0, copySize, csrDependencies, outputTimestampPacket);
} else {
blitBuffer(*hostPtrBuffer, buffer, blocking, 0, bufferOffset, copySize, csrDependencies);
blitBuffer(*hostPtrBuffer, buffer, blocking, 0, bufferOffset, copySize, csrDependencies, outputTimestampPacket);
}
}
} // namespace NEO

View File

@ -177,9 +177,10 @@ class CommandStreamReceiver {
}
void blitWithHostPtr(Buffer &buffer, void *hostPtr, bool blocking, size_t bufferOffset, uint64_t copySize,
BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies);
BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies,
const TimestampPacketContainer &outputTimestampPacket);
virtual void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, bool blocking, uint64_t dstOffset, uint64_t srcOffset,
uint64_t copySize, CsrDependencies &csrDependencies) = 0;
uint64_t copySize, CsrDependencies &csrDependencies, const TimestampPacketContainer &outputTimestampPacket) = 0;
ScratchSpaceController *getScratchSpaceController() const {
return scratchSpaceController.get();

View File

@ -71,7 +71,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, bool blocking, uint64_t dstOffset, uint64_t srcOffset,
uint64_t copySize, CsrDependencies &csrDependencies) override;
uint64_t copySize, CsrDependencies &csrDependencies, const TimestampPacketContainer &outputTimestampPacket) override;
protected:
using CommandStreamReceiver::osContext;

View File

@ -726,14 +726,15 @@ bool CommandStreamReceiverHw<GfxFamily>::detectInitProgrammingFlagsRequired(cons
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, bool blocking, uint64_t dstOffset, uint64_t srcOffset,
uint64_t copySize, CsrDependencies &csrDependencies) {
uint64_t copySize, CsrDependencies &csrDependencies, const TimestampPacketContainer &outputTimestampPacket) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
UNRECOVERABLE_IF(osContext->getEngineType() != aub_stream::EngineType::ENGINE_BCS);
auto lock = obtainUniqueOwnership();
auto &commandStream = getCS(BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(copySize, csrDependencies));
bool updateTimestampPacket = outputTimestampPacket.peekNodes().size() > 0;
auto &commandStream = getCS(BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(copySize, csrDependencies, updateTimestampPacket));
auto commandStreamStart = commandStream.getUsed();
auto newTaskCount = taskCount + 1;
latestSentTaskCount = newTaskCount;
@ -744,6 +745,12 @@ void CommandStreamReceiverHw<GfxFamily>::blitBuffer(Buffer &dstBuffer, Buffer &s
HardwareCommandsHelper<GfxFamily>::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), newTaskCount);
if (updateTimestampPacket) {
UNRECOVERABLE_IF(outputTimestampPacket.peekNodes().size() != 1);
auto timestampPacketGpuAddress = outputTimestampPacket.peekNodes().at(0)->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
HardwareCommandsHelper<GfxFamily>::programMiFlushDw(commandStream, timestampPacketGpuAddress, 0);
}
auto batchBufferEnd = reinterpret_cast<MI_BATCH_BUFFER_END *>(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END)));
*batchBufferEnd = GfxFamily::cmdInitBatchBufferEnd;

View File

@ -15,7 +15,7 @@ class LinearStream;
template <typename GfxFamily>
struct BlitCommandsHelper {
static size_t estimateBlitCommandsSize(uint64_t copySize, CsrDependencies &csrDependencies);
static size_t estimateBlitCommandsSize(uint64_t copySize, CsrDependencies &csrDependencies, bool updateTimestampPacket);
static void dispatchBlitCommandsForBuffer(Buffer &dstBuffer, Buffer &srcBuffer, LinearStream &linearStream,
uint64_t dstOffset, uint64_t srcOffset, uint64_t copySize);
static void appendBlitCommandsForBuffer(Buffer &dstBuffer, Buffer &srcBuffer, typename GfxFamily::XY_COPY_BLT &blitCmd);

View File

@ -10,7 +10,7 @@
namespace NEO {
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize, CsrDependencies &csrDependencies) {
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize, CsrDependencies &csrDependencies, bool updateTimestampPacket) {
size_t numberOfBlits = 0;
uint64_t sizeToBlit = copySize;
uint64_t width = 1;
@ -33,6 +33,7 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize
size_t size = TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
(sizeof(typename GfxFamily::XY_COPY_BLT) * numberOfBlits) +
sizeof(typename GfxFamily::MI_FLUSH_DW) +
(sizeof(typename GfxFamily::MI_FLUSH_DW) * static_cast<size_t>(updateTimestampPacket)) +
sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
return alignUp(size, MemoryConstants::cacheLineSize);

View File

@ -18,6 +18,7 @@
#include "runtime/helpers/hw_helper.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/helpers/string.h"
#include "runtime/helpers/timestamp_packet.h"
#include "runtime/helpers/validators.h"
#include "runtime/mem_obj/mem_obj_helper.h"
#include "runtime/memory_manager/host_ptr_manager.h"
@ -286,8 +287,9 @@ Buffer *Buffer::create(Context *context,
auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*pBuffer);
if (blitCommandStreamReceiver) {
CsrDependencies dependencies;
TimestampPacketContainer timestampPacketContainer;
blitCommandStreamReceiver->blitWithHostPtr(*pBuffer, hostPtr, true, 0, size, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
dependencies);
dependencies, timestampPacketContainer);
} else {
auto cmdQ = context->getSpecialQueue();
if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0, size, hostPtr, nullptr, 0, nullptr, nullptr)) {

View File

@ -285,6 +285,7 @@ struct BcsTests : public CommandStreamReceiverHwTest {
CommandStreamReceiverHwTest::TearDown();
}
TimestampPacketContainer timestampPacketContainer;
CsrDependencies csrDependencies;
std::unique_ptr<MockContext> context;
};
@ -301,13 +302,27 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredComman
auto expectedAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts), MemoryConstants::cacheLineSize);
auto expectedNotAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts), MemoryConstants::cacheLineSize);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize, csrDependencies);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize, csrDependencies, false);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies, false);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
}
HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommandsThenAddMiFlushDw) {
size_t expectedBaseSize = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_FLUSH_DW) +
sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
auto expectedSizeWithTimestampPacketWrite = alignUp(expectedBaseSize + sizeof(typename FamilyType::MI_FLUSH_DW), MemoryConstants::cacheLineSize);
auto expectedSizeWithoutTimestampPacketWrite = alignUp(expectedBaseSize, MemoryConstants::cacheLineSize);
auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies, true);
auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies, false);
EXPECT_EQ(expectedSizeWithTimestampPacketWrite, estimatedSizeWithTimestampPacketWrite);
EXPECT_EQ(expectedSizeWithoutTimestampPacketWrite, estimatedSizeWithoutTimestampPacketWrite);
}
HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
uint32_t numberOfBlts = 1;
size_t numberNodesPerContainer = 5;
@ -324,7 +339,7 @@ HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAd
auto expectedAlignedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies, false);
EXPECT_EQ(expectedAlignedSize, estimatedSize);
}
@ -347,7 +362,8 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
uint32_t newTaskCount = 19;
csr.taskCount = newTaskCount - 1;
EXPECT_EQ(0u, csr.recursiveLockCounter.load());
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, bltSize, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, bltSize, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
EXPECT_EQ(newTaskCount, csr.taskCount);
EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount);
EXPECT_EQ(newTaskCount, csr.latestSentTaskCount);
@ -408,7 +424,8 @@ HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaph
csrDependencies.push_back(&timestamp0);
csrDependencies.push_back(&timestamp1);
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream);
@ -448,7 +465,8 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations
EXPECT_EQ(0u, csr.makeSurfacePackNonResidentCalled);
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
EXPECT_TRUE(csr.isMadeResident(buffer->getGraphicsAllocation()));
EXPECT_TRUE(csr.isMadeResident(csr.commandStream.getGraphicsAllocation()));
@ -472,7 +490,8 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) {
uint32_t newTaskCount = 17;
csr.taskCount = newTaskCount - 1;
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
EXPECT_EQ(commandStream.getGraphicsAllocation(), csr.latestFlushedBatchBuffer.commandBufferAllocation);
EXPECT_EQ(commandStreamOffset, csr.latestFlushedBatchBuffer.startOffset);
@ -517,10 +536,12 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
myMockCsr->blitWithHostPtr(*buffer, hostPtr, false, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
myMockCsr->blitWithHostPtr(*buffer, hostPtr, false, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
myMockCsr->blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
myMockCsr->blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);
EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed);
@ -541,10 +562,12 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) {
EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled);
bcsCsr.blitWithHostPtr(*buffer, hostPtr, false, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
bcsCsr.blitWithHostPtr(*buffer, hostPtr, false, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled);
bcsCsr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
bcsCsr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
EXPECT_EQ(1u, mockInternalAllocationsStorage->cleanAllocationsCalled);
EXPECT_EQ(bcsCsr.taskCount, mockInternalAllocationsStorage->lastCleanAllocationsTaskCount);
EXPECT_TRUE(TEMPORARY_ALLOCATION == mockInternalAllocationsStorage->lastCleanAllocationUsage);
@ -561,7 +584,8 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
{
// from hostPtr
HardwareParse hwParser;
csr.blitWithHostPtr(*buffer1, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer1, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
hwParser.parseCommands<FamilyType>(csr.commandStream);
@ -574,7 +598,8 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
// to hostPtr
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
csr.blitWithHostPtr(*buffer1, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer1, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr,
csrDependencies, timestampPacketContainer);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
@ -587,7 +612,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
// Buffer to Buffer
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
csr.blitBuffer(*buffer1, *buffer2, true, 0, 0, 1, csrDependencies);
csr.blitBuffer(*buffer1, *buffer2, true, 0, 0, 1, csrDependencies, timestampPacketContainer);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
@ -613,7 +638,8 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
// from hostPtr
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
csr.blitWithHostPtr(*buffer1, hostPtr, true, buffer1Offset, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer1, hostPtr, true, buffer1Offset, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
csrDependencies, timestampPacketContainer);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
@ -626,7 +652,8 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
// to hostPtr
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
csr.blitWithHostPtr(*buffer1, hostPtr, true, buffer1Offset, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr, csrDependencies);
csr.blitWithHostPtr(*buffer1, hostPtr, true, buffer1Offset, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr,
csrDependencies, timestampPacketContainer);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
@ -640,7 +667,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
// Buffer to Buffer
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
csr.blitBuffer(*buffer1, *buffer2, true, buffer1Offset, buffer2Offset, 1, csrDependencies);
csr.blitBuffer(*buffer1, *buffer2, true, buffer1Offset, buffer2Offset, 1, csrDependencies, timestampPacketContainer);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);

View File

@ -457,7 +457,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, bool blocking, uint64_t dstOffset, uint64_t srcOffset,
uint64_t copySize, CsrDependencies &csrDependencies) override{};
uint64_t copySize, CsrDependencies &csrDependencies, const TimestampPacketContainer &outputTimestampPacket) override{};
CompletionStamp flushTask(
LinearStream &commandStream,

View File

@ -167,9 +167,9 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, bool blocking, uint64_t dstOffset, uint64_t srcOffset,
uint64_t copySize, CsrDependencies &csrDependencies) override {
uint64_t copySize, CsrDependencies &csrDependencies, const TimestampPacketContainer &outputTimestampPacket) override {
blitBufferCalled++;
CommandStreamReceiverHw<GfxFamily>::blitBuffer(dstBuffer, srcBuffer, blocking, dstOffset, srcOffset, copySize, csrDependencies);
CommandStreamReceiverHw<GfxFamily>::blitBuffer(dstBuffer, srcBuffer, blocking, dstOffset, srcOffset, copySize, csrDependencies, outputTimestampPacket);
}
std::atomic<uint32_t> recursiveLockCounter;

View File

@ -21,6 +21,7 @@
#include "unit_tests/fixtures/memory_management_fixture.h"
#include "unit_tests/gen_common/matchers.h"
#include "unit_tests/helpers/debug_manager_state_restore.h"
#include "unit_tests/helpers/hw_parse.h"
#include "unit_tests/helpers/memory_management.h"
#include "unit_tests/mocks/mock_buffer.h"
#include "unit_tests/mocks/mock_command_queue.h"
@ -28,6 +29,7 @@
#include "unit_tests/mocks/mock_execution_environment.h"
#include "unit_tests/mocks/mock_gmm_resource_info.h"
#include "unit_tests/mocks/mock_memory_manager.h"
#include "unit_tests/mocks/mock_timestamp_container.h"
#include "unit_tests/utilities/base_object_utils.h"
#include "gmock/gmock.h"
@ -660,7 +662,10 @@ struct BcsBufferTests : public ::testing::Test {
if (is32bit) {
GTEST_SKIP();
}
DebugManager.flags.EnableTimestampPacket.set(1);
DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(true);
device.reset(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
device->getExecutionEnvironment()->getMutableHardwareInfo()->capabilityTable.blitterOperationsSupported = true;
bcsMockContext = std::make_unique<BcsMockContext>(device.get());
}
@ -716,6 +721,90 @@ HWTEST_F(BcsBufferTests, givenBcsSupportedWhenEnqueueReadWriteBufferIsCalledThen
EXPECT_EQ(2u, bcsCsr->blitBufferCalled);
}
HWTEST_F(BcsBufferTests, givenInputDependenciesWhenEnqueueBlitCalledThenProgramSemaphoresBeforeBlitCommand) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto csr0 = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsMockContext->bcsCsr.get());
auto &csr1Engine = device->getEngine(aub_stream::EngineType::ENGINE_RCS, true);
auto csr1 = csr1Engine.commandStreamReceiver;
auto cmdQ0 = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto cmdQ1 = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
cmdQ1->engine = &csr1Engine;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->forceDisallowCPUCopy = true;
void *hostPtr = reinterpret_cast<void *>(0x12340000);
const cl_uint numEvents = 2;
MockTimestampPacketContainer timestamps[numEvents] = {{*csr0->getTimestampPacketAllocator(), 1}, {*csr1->getTimestampPacketAllocator(), 1}};
Event event0(cmdQ0.get(), 0, 0, 0);
event0.addTimestampPacketNodes(timestamps[0]);
Event event1(cmdQ1.get(), 0, 0, 0);
event1.addTimestampPacketNodes(timestamps[1]);
cl_event waitlist[numEvents] = {&event0, &event1}; // dependencies from different CSRs
cmdQ0->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, numEvents, waitlist, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr0->commandStream);
uint32_t semaphoresCount = 0;
bool blitCmdFound = false;
for (auto &cmd : hwParser.cmdList) {
if (auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(cmd)) {
EXPECT_FALSE(blitCmdFound);
auto dataAddress = timestamps[semaphoresCount].peekNodes().at(0)->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
semaphoresCount++;
} else if (genCmdCast<typename FamilyType::XY_COPY_BLT *>(cmd)) {
blitCmdFound = true;
EXPECT_EQ(2u, semaphoresCount);
}
}
EXPECT_EQ(2u, semaphoresCount);
EXPECT_TRUE(blitCmdFound);
}
HWTEST_F(BcsBufferTests, givenOutputTimestampPacketWhenBlitCalledThenProgramMiFlushDwWithDataWrite) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
auto csr = static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsMockContext->bcsCsr.get());
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->forceDisallowCPUCopy = true;
void *hostPtr = reinterpret_cast<void *>(0x12340000);
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
auto outputTimestampPacket = cmdQ->timestampPacketContainer->peekNodes().at(0);
auto timestampPacketGpuWriteAddress = outputTimestampPacket->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr->commandStream);
uint32_t miFlushDwCmdsCount = 0;
bool blitCmdFound = false;
for (auto &cmd : hwParser.cmdList) {
if (auto miFlushDwCmd = genCmdCast<MI_FLUSH_DW *>(cmd)) {
EXPECT_TRUE(blitCmdFound);
EXPECT_EQ(miFlushDwCmdsCount == 1,
timestampPacketGpuWriteAddress == miFlushDwCmd->getDestinationAddress());
EXPECT_EQ(miFlushDwCmdsCount == 1,
0u == miFlushDwCmd->getImmediateData());
miFlushDwCmdsCount++;
} else if (genCmdCast<typename FamilyType::XY_COPY_BLT *>(cmd)) {
blitCmdFound = true;
EXPECT_EQ(0u, miFlushDwCmdsCount);
}
}
EXPECT_EQ(2u, miFlushDwCmdsCount);
EXPECT_TRUE(blitCmdFound);
}
TEST_F(RenderCompressedBuffersCopyHostMemoryTests, givenNonRenderCompressedBufferWhenCopyFromHostPtrIsRequiredThenDontCallWriteBuffer) {
hwInfo->capabilityTable.ftrRenderCompressedBuffers = false;

View File

@ -255,7 +255,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, bool blocking, uint64_t dstOffset, uint64_t srcOffset,
uint64_t copySize, CsrDependencies &csrDependencies) override{};
uint64_t copySize, CsrDependencies &csrDependencies, const TimestampPacketContainer &outputTimestampPacket) override{};
void setOSInterface(OSInterface *osInterface);