From b7bd3aa793da64310c2f6b2f457b95d02d67d196 Mon Sep 17 00:00:00 2001 From: Maciej Dziuban Date: Thu, 18 Apr 2019 12:37:57 +0200 Subject: [PATCH] Defer makeCoherent call to blocking calls This allows flush() not to be blocking while using TBX Resolves: NEO-3054 Change-Id: Ib3a408d4b5ec66f0848572841b3c60785fc28ad9 Signed-off-by: Maciej Dziuban --- .../command_stream_receiver.cpp | 3 - .../command_stream/command_stream_receiver.h | 2 - .../tbx_command_stream_receiver_hw.h | 11 +++- .../tbx_command_stream_receiver_hw.inl | 32 +++++++---- .../command_stream_receiver_tests.cpp | 12 ---- .../tbx_command_stream_tests.cpp | 57 ++++++++++++++++--- unit_tests/mocks/mock_tbx_csr.h | 16 +----- 7 files changed, 80 insertions(+), 53 deletions(-) diff --git a/runtime/command_stream/command_stream_receiver.cpp b/runtime/command_stream/command_stream_receiver.cpp index 1415fbd151..9013f3d2dd 100644 --- a/runtime/command_stream/command_stream_receiver.cpp +++ b/runtime/command_stream/command_stream_receiver.cpp @@ -80,7 +80,6 @@ void CommandStreamReceiver::processEviction() { void CommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) { if (gfxAllocation.isResident(osContext->getContextId())) { - makeCoherent(gfxAllocation); if (gfxAllocation.peekEvictable()) { this->getEvictionAllocations().push_back(&gfxAllocation); } else { @@ -92,8 +91,6 @@ void CommandStreamReceiver::makeNonResident(GraphicsAllocation &gfxAllocation) { } void CommandStreamReceiver::makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency) { - this->waitBeforeMakingNonResidentWhenRequired(); - for (auto &surface : allocationsForResidency) { this->makeNonResident(*surface); } diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 25d982b2b5..2951a07bd6 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -71,14 +71,12 @@ class CommandStreamReceiver { virtual void flushBatchedSubmissions() = 0; - virtual void makeCoherent(GraphicsAllocation &gfxAllocation){}; virtual void makeResident(GraphicsAllocation &gfxAllocation); virtual void makeNonResident(GraphicsAllocation &gfxAllocation); MOCKABLE_VIRTUAL void makeSurfacePackNonResident(ResidencyContainer &allocationsForResidency); virtual void processResidency(ResidencyContainer &allocationsForResidency) {} virtual void processEviction(); void makeResidentHostPtrAllocation(GraphicsAllocation *gfxAllocation); - virtual void waitBeforeMakingNonResidentWhenRequired() {} void ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize); diff --git a/runtime/command_stream/tbx_command_stream_receiver_hw.h b/runtime/command_stream/tbx_command_stream_receiver_hw.h index 9f1fbdab44..002f2255f8 100644 --- a/runtime/command_stream/tbx_command_stream_receiver_hw.h +++ b/runtime/command_stream/tbx_command_stream_receiver_hw.h @@ -14,6 +14,8 @@ #include "command_stream_receiver_simulated_hw.h" +#include + namespace NEO { class TbxStream; @@ -37,6 +39,8 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::initAdditionalMMIO; using CommandStreamReceiverSimulatedCommonHw::aubManager; @@ -45,10 +49,11 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::stream; FlushStamp flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; - void makeCoherent(GraphicsAllocation &gfxAllocation) override; + void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override; + + void processEviction() override; void processResidency(ResidencyContainer &allocationsForResidency) override; - void waitBeforeMakingNonResidentWhenRequired() override; void writeMemory(uint64_t gpuAddress, void *cpuAddress, size_t size, uint32_t memoryBank, uint64_t entryBits) override; bool writeMemory(GraphicsAllocation &gfxAllocation) override; @@ -78,6 +83,8 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw GGTT VA AddressMapper gttRemap; + std::set allocationsForDownload = {}; + CommandStreamReceiverType getType() override { return CommandStreamReceiverType::CSR_TBX; } diff --git a/runtime/command_stream/tbx_command_stream_receiver_hw.inl b/runtime/command_stream/tbx_command_stream_receiver_hw.inl index cff7932788..74f0ce784c 100644 --- a/runtime/command_stream/tbx_command_stream_receiver_hw.inl +++ b/runtime/command_stream/tbx_command_stream_receiver_hw.inl @@ -385,6 +385,28 @@ bool TbxCommandStreamReceiverHw::writeMemory(GraphicsAllocation &gfxA return true; } +template +void TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { + this->flushBatchedSubmissions(); + + while (*this->getTagAddress() < this->latestFlushedTaskCount) { + makeCoherent(*this->getTagAllocation()); + } + + for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { + makeCoherent(*graphicsAllocation); + } + this->allocationsForDownload.clear(); + + BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); +} + +template +void TbxCommandStreamReceiverHw::processEviction() { + this->allocationsForDownload.insert(this->getEvictionAllocations().begin(), this->getEvictionAllocations().end()); + BaseClass::processEviction(); +} + template void TbxCommandStreamReceiverHw::processResidency(ResidencyContainer &allocationsForResidency) { for (auto &gfxAllocation : allocationsForResidency) { @@ -416,16 +438,6 @@ void TbxCommandStreamReceiverHw::makeCoherent(GraphicsAllocation &gfx } } -template -void TbxCommandStreamReceiverHw::waitBeforeMakingNonResidentWhenRequired() { - auto allocation = this->getTagAllocation(); - UNRECOVERABLE_IF(allocation == nullptr); - - while (*this->getTagAddress() < this->latestFlushedTaskCount) { - this->makeCoherent(*allocation); - } -} - template uint32_t TbxCommandStreamReceiverHw::getMaskAndValueForPollForCompletion() const { return 0x100; diff --git a/unit_tests/command_stream/command_stream_receiver_tests.cpp b/unit_tests/command_stream/command_stream_receiver_tests.cpp index 882778a813..e74b705b3a 100644 --- a/unit_tests/command_stream/command_stream_receiver_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_tests.cpp @@ -392,18 +392,6 @@ TEST(CommandStreamReceiverSimpleTest, givenNullHardwareDebugModeWhenInitializeTa EXPECT_EQ(*csr->getTagAddress(), static_cast(-1)); } -TEST(CommandStreamReceiverSimpleTest, givenCSRWhenWaitBeforeMakingNonResidentWhenRequiredIsCalledWithBlockingFlagSetThenItReturnsImmediately) { - ExecutionEnvironment executionEnvironment; - MockCommandStreamReceiver csr(executionEnvironment); - uint32_t tag = 0; - MockGraphicsAllocation allocation(&tag, sizeof(tag)); - csr.latestFlushedTaskCount = 3; - csr.setTagAllocation(&allocation); - csr.waitBeforeMakingNonResidentWhenRequired(); - - EXPECT_EQ(0u, tag); -} - TEST(CommandStreamReceiverSimpleTest, givenVariousDataSetsWhenVerifyingMemoryThenCorrectValueIsReturned) { ExecutionEnvironment executionEnvironment; MockCommandStreamReceiver csr(executionEnvironment); diff --git a/unit_tests/command_stream/tbx_command_stream_tests.cpp b/unit_tests/command_stream/tbx_command_stream_tests.cpp index a9fcde2a59..1da6bb7ca8 100644 --- a/unit_tests/command_stream/tbx_command_stream_tests.cpp +++ b/unit_tests/command_stream/tbx_command_stream_tests.cpp @@ -318,21 +318,60 @@ HWTEST_F(TbxCommandStreamTests, givenDbgDeviceIdFlagIsSetWhenTbxCsrIsCreatedThen EXPECT_EQ(9u, tbxCsr->aubDeviceId); } -HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenWaitBeforeMakeNonResidentWhenRequiredIsCalledWithBlockingFlagTrueThenFunctionStallsUntilMakeCoherentUpdatesTagAddress) { - uint32_t tag = 0; - MockTbxCsrToTestWaitBeforeMakingNonResident tbxCsr(*pDevice->executionEnvironment); +HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingMakeSurfacePackNonResidentThenOnlyResidentAllocationsAreScheduledForCoherence) { + MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; + MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false); + tbxCsr.setupContext(osContext); + EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size()); + + MockGraphicsAllocation allocation1, allocation2, allocation3; + allocation1.usageInfos[0].residencyTaskCount = 1; + allocation3.usageInfos[0].residencyTaskCount = 1; + ASSERT_TRUE(allocation1.isResident(0u)); + ASSERT_FALSE(allocation2.isResident(0u)); + ASSERT_TRUE(allocation3.isResident(0u)); + + ResidencyContainer allocationsForResidency{&allocation1, &allocation2, &allocation3}; + + tbxCsr.makeSurfacePackNonResident(allocationsForResidency); + std::set expectedAllocationsForDownload = {&allocation1, &allocation3}; + EXPECT_EQ(expectedAllocationsForDownload, tbxCsr.allocationsForDownload); +} + +HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForTaskCountWithKmdNotifyFallbackThenTagAllocationAndScheduledAllocationsAreDownloaded) { + struct MockTbxCsr : TbxCommandStreamReceiverHw { + using CommandStreamReceiver::latestFlushedTaskCount; + using TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw; + void makeCoherent(GraphicsAllocation &gfxAllocation) override { + *reinterpret_cast(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount; + downloadedAllocations.insert(&gfxAllocation); + } + std::set downloadedAllocations; + }; + + MockTbxCsr tbxCsr{*pDevice->executionEnvironment}; + MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false); + uint32_t tag = 0u; + tbxCsr.setupContext(osContext); tbxCsr.setTagAllocation(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{false, sizeof(tag)}, &tag)); + tbxCsr.latestFlushedTaskCount = 1u; - EXPECT_FALSE(tbxCsr.makeCoherentCalled); + MockGraphicsAllocation allocation1, allocation2, allocation3; + allocation1.usageInfos[0].residencyTaskCount = 1; + allocation2.usageInfos[0].residencyTaskCount = 1; + allocation3.usageInfos[0].residencyTaskCount = 1; + ASSERT_TRUE(allocation1.isResident(0u)); + ASSERT_TRUE(allocation2.isResident(0u)); + ASSERT_TRUE(allocation3.isResident(0u)); - *tbxCsr.getTagAddress() = 3; - tbxCsr.latestFlushedTaskCount = 6; + tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3}; - tbxCsr.waitBeforeMakingNonResidentWhenRequired(); + tbxCsr.waitForTaskCountWithKmdNotifyFallback(0u, 0u, false, false); - EXPECT_TRUE(tbxCsr.makeCoherentCalled); - EXPECT_EQ(6u, tag); + std::set expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3}; + EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations); + EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size()); } HWTEST_F(TbxCommandSteamSimpleTest, whenTbxCommandStreamReceiverIsCreatedThenPPGTTAndGGTTCreatedHavePhysicalAddressAllocatorSet) { diff --git a/unit_tests/mocks/mock_tbx_csr.h b/unit_tests/mocks/mock_tbx_csr.h index 7af095ca22..25c23c2ae9 100644 --- a/unit_tests/mocks/mock_tbx_csr.h +++ b/unit_tests/mocks/mock_tbx_csr.h @@ -19,25 +19,11 @@ namespace NEO { -template -class MockTbxCsrToTestWaitBeforeMakingNonResident : public TbxCommandStreamReceiverHw { - public: - using CommandStreamReceiver::latestFlushedTaskCount; - MockTbxCsrToTestWaitBeforeMakingNonResident(ExecutionEnvironment &executionEnvironment) - : TbxCommandStreamReceiverHw(executionEnvironment) {} - - void makeCoherent(GraphicsAllocation &gfxAllocation) override { - auto tagAddress = reinterpret_cast(gfxAllocation.getUnderlyingBuffer()); - *tagAddress = this->latestFlushedTaskCount; - makeCoherentCalled = true; - } - bool makeCoherentCalled = false; -}; - template class MockTbxCsr : public TbxCommandStreamReceiverHw { public: using TbxCommandStreamReceiverHw::writeMemory; + using TbxCommandStreamReceiverHw::allocationsForDownload; MockTbxCsr(ExecutionEnvironment &executionEnvironment) : TbxCommandStreamReceiverHw(executionEnvironment) {}