From ef4fae39035edba7cc94799973bc700c94b74e63 Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Wed, 6 May 2020 11:52:48 +0200 Subject: [PATCH] Enable TBX mode in level zero RelatedTo: NEO-4644 Change-Id: I76913d6b7c7d978a5a90a7a574778c67283497c1 Signed-off-by: Mateusz Hoppe --- level_zero/core/source/cmdqueue/cmdqueue.cpp | 3 -- level_zero/core/source/fence/fence.cpp | 2 - .../core/source/hw_helpers/hw_helpers.h | 8 ---- .../tbx_command_stream_receiver_hw.h | 2 + .../tbx_command_stream_receiver_hw.inl | 12 +++++- .../tbx_command_stream_tests.cpp | 38 +++++++++++++------ opencl/test/unit_test/mocks/mock_tbx_csr.h | 16 ++++++++ .../command_stream/command_stream_receiver.h | 2 +- 8 files changed, 57 insertions(+), 26 deletions(-) diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 23414fc2ff..f01b961d4f 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -68,9 +68,6 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint32_t timeout) UNRECOVERABLE_IF(csr == nullptr); auto taskCountToWait = this->taskCount; - - waitForTaskCountWithKmdNotifyFallbackHelper(csr, this->taskCount, 0, false, false); - bool enableTimeout = (timeout != std::numeric_limits::max()); csr->waitForCompletionWithTimeout(enableTimeout, timeout, this->taskCount); diff --git a/level_zero/core/source/fence/fence.cpp b/level_zero/core/source/fence/fence.cpp index 5b6147bd3a..b2aff014f5 100644 --- a/level_zero/core/source/fence/fence.cpp +++ b/level_zero/core/source/fence/fence.cpp @@ -92,8 +92,6 @@ ze_result_t FenceImp::hostSynchronize(uint32_t timeout) { return ZE_RESULT_SUCCESS; } - waitForTaskCountWithKmdNotifyFallbackHelper(cmdQueue->getCsr(), cmdQueue->getTaskCount(), 0, false, false); - if (timeout == 0) { return queryStatus(); } diff --git a/level_zero/core/source/hw_helpers/hw_helpers.h b/level_zero/core/source/hw_helpers/hw_helpers.h index ae32651800..2721ac0409 100644 --- a/level_zero/core/source/hw_helpers/hw_helpers.h +++ b/level_zero/core/source/hw_helpers/hw_helpers.h @@ -15,12 +15,4 @@ namespace L0 { inline uint64_t getIntermediateCacheSize(const NEO::HardwareInfo &hwInfo) { return 0u; } - -inline void waitForTaskCountWithKmdNotifyFallbackHelper(NEO::CommandStreamReceiver *csr, - uint32_t taskCountToWait, - NEO::FlushStamp flushStampToWait, - bool useQuickKmdSleep, - bool forcePowerSavingMode) { -} - } // namespace L0 diff --git a/opencl/source/command_stream/tbx_command_stream_receiver_hw.h b/opencl/source/command_stream/tbx_command_stream_receiver_hw.h index 5cfa8133ce..f0958ecbf2 100644 --- a/opencl/source/command_stream/tbx_command_stream_receiver_hw.h +++ b/opencl/source/command_stream/tbx_command_stream_receiver_hw.h @@ -31,6 +31,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::initAdditionalMMIO; @@ -42,6 +43,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::expectMemory(const void *gfxAddress, } template -void TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { +void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations() { this->flushBatchedSubmissions(); while (*this->getTagAddress() < this->latestFlushedTaskCount) { @@ -462,10 +462,20 @@ void TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallbac downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); +} +template +void TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) { + flushSubmissionsAndDownloadAllocations(); BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); } +template +bool TbxCommandStreamReceiverHw::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) { + flushSubmissionsAndDownloadAllocations(); + return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait); +} + template void TbxCommandStreamReceiverHw::processEviction() { this->allocationsForDownload.insert(this->getEvictionAllocations().begin(), this->getEvictionAllocations().end()); diff --git a/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp b/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp index f425d141f3..2e872d023a 100644 --- a/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp +++ b/opencl/test/unit_test/command_stream/tbx_command_stream_tests.cpp @@ -396,17 +396,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingMakeSurfacePackNonResi } HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForTaskCountWithKmdNotifyFallbackThenTagAllocationAndScheduledAllocationsAreDownloaded) { - struct MockTbxCsr : TbxCommandStreamReceiverHw { - using CommandStreamReceiver::latestFlushedTaskCount; - using TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw; - void downloadAllocation(GraphicsAllocation &gfxAllocation) override { - *reinterpret_cast(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount; - downloadedAllocations.insert(&gfxAllocation); - } - std::set downloadedAllocations; - }; - - MockTbxCsr tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()}; + MockTbxCsrRegisterDownloadedAllocations tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()}; MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); uint32_t tag = 0u; tbxCsr.setupContext(osContext); @@ -430,6 +420,32 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForTaskCountWithKm EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size()); } +HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForCompletionWithTimeoutThenFlushIsCalledAndTagAllocationAndScheduledAllocationsAreDownloaded) { + MockTbxCsrRegisterDownloadedAllocations tbxCsr{*pDevice->executionEnvironment, pDevice->getRootDeviceIndex()}; + MockOsContext osContext(0, 1, aub_stream::ENGINE_RCS, PreemptionMode::Disabled, false, false, false); + uint32_t tag = 0u; + tbxCsr.setupContext(osContext); + tbxCsr.setTagAllocation(pDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), false, sizeof(tag)}, &tag)); + tbxCsr.latestFlushedTaskCount = 1u; + + MockGraphicsAllocation allocation1, allocation2, allocation3; + allocation1.usageInfos[0].residencyTaskCount = 1; + allocation2.usageInfos[0].residencyTaskCount = 1; + allocation3.usageInfos[0].residencyTaskCount = 1; + ASSERT_TRUE(allocation1.isResident(0u)); + ASSERT_TRUE(allocation2.isResident(0u)); + ASSERT_TRUE(allocation3.isResident(0u)); + + tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3}; + + tbxCsr.waitForCompletionWithTimeout(true, 0, 0); + + std::set expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3}; + EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations); + EXPECT_EQ(0u, tbxCsr.allocationsForDownload.size()); + EXPECT_TRUE(tbxCsr.flushBatchedSubmissionsCalled); +} + HWTEST_F(TbxCommandSteamSimpleTest, whenTbxCommandStreamReceiverIsCreatedThenPPGTTAndGGTTCreatedHavePhysicalAddressAllocatorSet) { MockTbxCsr tbxCsr(*pDevice->executionEnvironment); diff --git a/opencl/test/unit_test/mocks/mock_tbx_csr.h b/opencl/test/unit_test/mocks/mock_tbx_csr.h index 5a5d83ea87..b153e40e18 100644 --- a/opencl/test/unit_test/mocks/mock_tbx_csr.h +++ b/opencl/test/unit_test/mocks/mock_tbx_csr.h @@ -70,4 +70,20 @@ class MockTbxCsr : public TbxCommandStreamReceiverHw { bool makeCoherentCalled = false; bool dumpAllocationCalled = false; }; + +template +struct MockTbxCsrRegisterDownloadedAllocations : TbxCommandStreamReceiverHw { + using CommandStreamReceiver::latestFlushedTaskCount; + using TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw; + void downloadAllocation(GraphicsAllocation &gfxAllocation) override { + *reinterpret_cast(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount; + downloadedAllocations.insert(&gfxAllocation); + } + bool flushBatchedSubmissions() override { + flushBatchedSubmissionsCalled = true; + return true; + } + std::set downloadedAllocations; + bool flushBatchedSubmissionsCalled = false; +}; } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index c753695c13..2f60008c3e 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -129,7 +129,7 @@ class CommandStreamReceiver { bool isStallingPipeControlOnNextFlushRequired() const { return stallingPipeControlOnNextFlushRequired; } virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0; - MOCKABLE_VIRTUAL bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait); + virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait); virtual void downloadAllocation(GraphicsAllocation &gfxAllocation){}; void setSamplerCacheFlushRequired(SamplerCacheFlushState value) { this->samplerCacheFlushRequired = value; }