Program MI_SEMAPHORE_WAIT for dependencies during blit operations

Change-Id: I8b0e467886bfb23d026a0c13be514343a22a20a1
Related-To: NEO-3020
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2019-05-20 12:00:02 +02:00 committed by sys_ocldev
parent e6c7c862ed
commit b82cdd6b8e
11 changed files with 100 additions and 27 deletions

View File

@ -408,7 +408,7 @@ cl_int CommandStreamReceiver::expectMemory(const void *gfxAddress, const void *s
}
void CommandStreamReceiver::blitWithHostPtr(Buffer &buffer, void *hostPtr, uint64_t hostPtrSize,
BlitterConstants::BlitWithHostPtrDirection copyDirection) {
BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies) {
HostPtrSurface hostPtrSurface(hostPtr, static_cast<size_t>(hostPtrSize), true);
bool success = createAllocationForHostSurface(hostPtrSurface, false);
UNRECOVERABLE_IF(!success);
@ -420,9 +420,9 @@ void CommandStreamReceiver::blitWithHostPtr(Buffer &buffer, void *hostPtr, uint6
true, false, true));
if (BlitterConstants::BlitWithHostPtrDirection::FromHostPtr == copyDirection) {
blitBuffer(buffer, *hostPtrBuffer, hostPtrSize);
blitBuffer(buffer, *hostPtrBuffer, hostPtrSize, csrDependencies);
} else {
blitBuffer(*hostPtrBuffer, buffer, hostPtrSize);
blitBuffer(*hostPtrBuffer, buffer, hostPtrSize, csrDependencies);
}
}
} // namespace NEO

View File

@ -174,8 +174,9 @@ class CommandStreamReceiver {
this->latestSentTaskCount = latestSentTaskCount;
}
void blitWithHostPtr(Buffer &buffer, void *hostPtr, uint64_t hostPtrSize, BlitterConstants::BlitWithHostPtrDirection copyDirection);
virtual void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize) = 0;
void blitWithHostPtr(Buffer &buffer, void *hostPtr, uint64_t hostPtrSize,
BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies);
virtual void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize, CsrDependencies &csrDependencies) = 0;
protected:
void cleanupResources();

View File

@ -70,7 +70,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
return CommandStreamReceiverType::CSR_HW;
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize) override;
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize, CsrDependencies &csrDependencies) override;
protected:
using CommandStreamReceiver::osContext;

View File

@ -725,18 +725,20 @@ bool CommandStreamReceiverHw<GfxFamily>::detectInitProgrammingFlagsRequired(cons
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize) {
void CommandStreamReceiverHw<GfxFamily>::blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize, CsrDependencies &csrDependencies) {
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
UNRECOVERABLE_IF(osContext->getEngineType() != aub_stream::EngineType::ENGINE_BCS);
auto lock = obtainUniqueOwnership();
auto &commandStream = getCS(BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(sourceSize));
auto &commandStream = getCS(BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(sourceSize, csrDependencies));
auto commandStreamStart = commandStream.getUsed();
auto newTaskCount = taskCount + 1;
latestSentTaskCount = newTaskCount;
TimestampPacketHelper::programCsrDependencies<GfxFamily>(commandStream, csrDependencies);
BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBuffer(dstBuffer, srcBuffer, commandStream, sourceSize);
auto miFlushDwCmd = reinterpret_cast<MI_FLUSH_DW *>(commandStream.getSpace(sizeof(MI_FLUSH_DW)));

View File

@ -15,7 +15,7 @@ class LinearStream;
template <typename GfxFamily>
struct BlitCommandsHelper {
static size_t estimateBlitCommandsSize(uint64_t copySize);
static size_t estimateBlitCommandsSize(uint64_t copySize, CsrDependencies &csrDependencies);
static void dispatchBlitCommandsForBuffer(Buffer &dstBuffer, Buffer &srcBuffer, LinearStream &linearStream, uint64_t copySize);
static void appendBlitCommandsForBuffer(Buffer &dstBuffer, Buffer &srcBuffer, typename GfxFamily::XY_COPY_BLT &blitCmd);
};

View File

@ -10,7 +10,7 @@
namespace NEO {
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize) {
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize, CsrDependencies &csrDependencies) {
size_t numberOfBlits = 0;
uint64_t sizeToBlit = copySize;
uint64_t width = 1;
@ -30,7 +30,8 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(uint64_t copySize
numberOfBlits++;
}
size_t size = (sizeof(typename GfxFamily::XY_COPY_BLT) * numberOfBlits) +
size_t size = TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
(sizeof(typename GfxFamily::XY_COPY_BLT) * numberOfBlits) +
sizeof(typename GfxFamily::MI_FLUSH_DW) +
sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);

View File

@ -277,7 +277,9 @@ Buffer *Buffer::create(Context *context,
if (gpuCopyRequired) {
auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*pBuffer);
if (blitCommandStreamReceiver) {
blitCommandStreamReceiver->blitWithHostPtr(*pBuffer, hostPtr, size, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr);
CsrDependencies dependencies;
blitCommandStreamReceiver->blitWithHostPtr(*pBuffer, hostPtr, size, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr,
dependencies);
} else {
auto cmdQ = context->getSpecialQueue();
if (CL_SUCCESS != cmdQ->enqueueWriteBuffer(pBuffer, CL_TRUE, 0, size, hostPtr, nullptr, 0, nullptr, nullptr)) {

View File

@ -41,6 +41,7 @@
#include "unit_tests/mocks/mock_internal_allocation_storage.h"
#include "unit_tests/mocks/mock_kernel.h"
#include "unit_tests/mocks/mock_submissions_aggregator.h"
#include "unit_tests/mocks/mock_timestamp_container.h"
#include "unit_tests/utilities/base_object_utils.h"
#include "reg_configs_common.h"
@ -276,6 +277,7 @@ struct BcsTests : public CommandStreamReceiverHwTest {
CommandStreamReceiverHwTest::TearDown();
}
CsrDependencies csrDependencies;
std::unique_ptr<MockContext> context;
};
@ -291,13 +293,34 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredComman
auto expectedAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts), MemoryConstants::cacheLineSize);
auto expectedNotAlignedSize = alignUp(expectedSize + (sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts), MemoryConstants::cacheLineSize);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize);
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(alignedBltSize, csrDependencies);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
}
HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
uint32_t numberOfBlts = 1;
size_t numberNodesPerContainer = 5;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
csrDependencies.push_back(&timestamp0);
csrDependencies.push_back(&timestamp1);
size_t expectedSize = sizeof(typename FamilyType::MI_FLUSH_DW) + sizeof(typename FamilyType::MI_BATCH_BUFFER_END) +
(sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts) +
TimestampPacketHelper::getRequiredCmdStreamSize<FamilyType>(csrDependencies);
auto expectedAlignedSize = alignUp(expectedSize, MemoryConstants::cacheLineSize);
auto estimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(1, csrDependencies);
EXPECT_EQ(expectedAlignedSize, estimatedSize);
}
HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommands) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
@ -316,7 +339,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
uint32_t newTaskCount = 19;
csr.taskCount = newTaskCount - 1;
EXPECT_EQ(0u, csr.recursiveLockCounter.load());
csr.blitWithHostPtr(*buffer, hostPtr, bltSize, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr);
csr.blitWithHostPtr(*buffer, hostPtr, bltSize, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
EXPECT_EQ(newTaskCount, csr.taskCount);
EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount);
EXPECT_EQ(newTaskCount, csr.latestSentTaskCount);
@ -363,6 +386,50 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
}
}
HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphoreAndAtomic) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
uint32_t numberOfDependencyContainers = 2;
size_t numberNodesPerContainer = 5;
MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer);
csrDependencies.push_back(&timestamp0);
csrDependencies.push_back(&timestamp1);
csr.blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(csr.commandStream);
auto &cmdList = hwParser.cmdList;
bool xyCopyBltCmdFound = false;
bool dependenciesFound = false;
for (auto cmdIterator = cmdList.begin(); cmdIterator != cmdList.end(); cmdIterator++) {
if (genCmdCast<typename FamilyType::XY_COPY_BLT *>(*cmdIterator)) {
xyCopyBltCmdFound = true;
continue;
}
auto miSemaphore = genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*cmdIterator);
if (miSemaphore) {
dependenciesFound = true;
EXPECT_FALSE(xyCopyBltCmdFound);
auto miAtomic = genCmdCast<typename FamilyType::MI_ATOMIC *>(*(++cmdIterator));
EXPECT_NE(nullptr, miAtomic);
for (uint32_t i = 1; i < numberOfDependencyContainers * numberNodesPerContainer; i++) {
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*(++cmdIterator)));
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_ATOMIC *>(*(++cmdIterator)));
}
}
}
EXPECT_TRUE(xyCopyBltCmdFound);
EXPECT_TRUE(dependenciesFound);
}
HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocationsResident) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.storeMakeResidentAllocations = true;
@ -373,7 +440,7 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations
EXPECT_EQ(0u, csr.makeSurfacePackNonResidentCalled);
csr.blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr);
csr.blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
EXPECT_TRUE(csr.isMadeResident(buffer->getGraphicsAllocation()));
EXPECT_TRUE(csr.isMadeResident(csr.commandStream.getGraphicsAllocation()));
@ -397,7 +464,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) {
uint32_t newTaskCount = 17;
csr.taskCount = newTaskCount - 1;
csr.blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr);
csr.blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
EXPECT_EQ(commandStream.getGraphicsAllocation(), csr.latestFlushedBatchBuffer.commandBufferAllocation);
EXPECT_EQ(commandStreamOffset, csr.latestFlushedBatchBuffer.startOffset);
@ -442,7 +509,7 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) {
auto buffer = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
myMockCsr->blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr);
myMockCsr->blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed);
@ -464,13 +531,13 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) {
bcsCsr.taskCount = newTaskCount - 1;
EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled);
bcsCsr.blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr);
bcsCsr.blitWithHostPtr(*buffer, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
EXPECT_EQ(1u, mockInternalAllocationsStorage->cleanAllocationsCalled);
EXPECT_EQ(newTaskCount, mockInternalAllocationsStorage->lastCleanAllocationsTaskCount);
EXPECT_TRUE(TEMPORARY_ALLOCATION == mockInternalAllocationsStorage->lastCleanAllocationUsage);
}
HWTEST_F(BcsTests, givenHostPtrWhenBlitWithHostPtrCalledThenProgramCorrectGpuAddresses) {
HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddresses) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
cl_int retVal = CL_SUCCESS;
@ -481,7 +548,7 @@ HWTEST_F(BcsTests, givenHostPtrWhenBlitWithHostPtrCalledThenProgramCorrectGpuAdd
{
// from hostPtr
HardwareParse hwParser;
csr.blitWithHostPtr(*buffer1, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr);
csr.blitWithHostPtr(*buffer1, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies);
hwParser.parseCommands<FamilyType>(csr.commandStream);
@ -494,7 +561,7 @@ HWTEST_F(BcsTests, givenHostPtrWhenBlitWithHostPtrCalledThenProgramCorrectGpuAdd
// to hostPtr
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
csr.blitWithHostPtr(*buffer1, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr);
csr.blitWithHostPtr(*buffer1, hostPtr, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr, csrDependencies);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
@ -507,7 +574,7 @@ HWTEST_F(BcsTests, givenHostPtrWhenBlitWithHostPtrCalledThenProgramCorrectGpuAdd
// Buffer to Buffer
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
csr.blitBuffer(*buffer1, *buffer2, 1);
csr.blitBuffer(*buffer1, *buffer2, 1, csrDependencies);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);

View File

@ -544,7 +544,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize) override{};
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize, CsrDependencies &csrDependencies) override{};
CompletionStamp flushTask(
LinearStream &commandStream,

View File

@ -165,9 +165,9 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
return CommandStreamReceiverHw<GfxFamily>::obtainUniqueOwnership();
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize) override {
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize, CsrDependencies &csrDependencies) override {
blitBufferCalled++;
CommandStreamReceiverHw<GfxFamily>::blitBuffer(dstBuffer, srcBuffer, sourceSize);
CommandStreamReceiverHw<GfxFamily>::blitBuffer(dstBuffer, srcBuffer, sourceSize, csrDependencies);
}
std::atomic<uint32_t> recursiveLockCounter;

View File

@ -254,7 +254,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
}
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize) override{};
void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, uint64_t sourceSize, CsrDependencies &csrDependencies) override{};
void setOSInterface(OSInterface *osInterface);