From a285712cc403bd1f99a5f0c42cf0055a5c40f82e Mon Sep 17 00:00:00 2001 From: "Jobczyk, Lukasz" Date: Tue, 29 Mar 2022 15:31:51 +0000 Subject: [PATCH] Add missing download allocation calls Signed-off-by: Jobczyk, Lukasz Signed-off-by: Lukasz Jobczyk --- .../command_stream_receiver.cpp | 16 +++++++++++- .../command_stream/command_stream_receiver.h | 4 ++- .../tbx_command_stream_receiver_hw.h | 2 +- .../tbx_command_stream_receiver_hw.inl | 15 +++++++---- .../libult/ult_command_stream_receiver.h | 10 +++++-- shared/test/common/mocks/mock_tbx_csr.h | 26 +++++++++++++++---- .../command_stream_receiver_tests.cpp | 1 + 7 files changed, 59 insertions(+), 15 deletions(-) diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 195d929dfd..2c1cc505f3 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -169,6 +169,7 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) { auto address = getTagAddress(); if (address) { + this->downloadTagAllocation(); return baseWaitFunction(address, WaitParams{false, false, 0}, requiredTaskCount); } @@ -277,7 +278,7 @@ void CommandStreamReceiver::cleanupResources() { } if (tagsMultiAllocation) { - //Null tag address to prevent waiting for tag update when freeing it + // Null tag address to prevent waiting for tag update when freeing it tagAllocation = nullptr; tagAddress = nullptr; DEBUG_BREAK_IF(tagAllocation != nullptr); @@ -461,6 +462,12 @@ AubSubCaptureStatus CommandStreamReceiver::checkAndActivateAubSubCapture(const s void CommandStreamReceiver::addAubComment(const char *comment) {} +void CommandStreamReceiver::downloadAllocation(GraphicsAllocation &gfxAllocation) { + if (this->downloadAllocationImpl) { + this->downloadAllocationImpl(gfxAllocation); + } +} + void CommandStreamReceiver::startControllingDirectSubmissions() { auto controller = this->executionEnvironment.directSubmissionController.get(); if (controller) { @@ -805,7 +812,14 @@ bool CommandStreamReceiver::checkImplicitFlushForGpuIdle() { return false; } +void CommandStreamReceiver::downloadTagAllocation() { + if (this->getTagAllocation()) { + this->downloadAllocation(*this->getTagAllocation()); + } +} + bool CommandStreamReceiver::testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) { + this->downloadTagAllocation(); for (uint32_t i = 0; i < activePartitions; i++) { if (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { return false; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 6c1ba212aa..b55ea311cf 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -235,7 +235,7 @@ class CommandStreamReceiver { return scratchSpaceController.get(); } - virtual void downloadAllocation(GraphicsAllocation &gfxAllocation){}; + void downloadAllocation(GraphicsAllocation &gfxAllocation); void registerInstructionCacheFlush() { auto mutex = obtainUniqueOwnership(); @@ -324,6 +324,7 @@ class CommandStreamReceiver { void printDeviceIndex(); void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation); bool checkImplicitFlushForGpuIdle(); + void downloadTagAllocation(); MOCKABLE_VIRTUAL std::unique_lock obtainHostPtrSurfaceCreationLock(); std::unique_ptr flushStamp; @@ -356,6 +357,7 @@ class CommandStreamReceiver { SpinLock debugPauseStateLock; static void *asyncDebugBreakConfirmation(void *arg); std::function debugConfirmationFunction = []() { std::cin.get(); }; + std::function downloadAllocationImpl; GraphicsAllocation *tagAllocation = nullptr; GraphicsAllocation *globalFenceAllocation = nullptr; diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.h b/shared/source/command_stream/tbx_command_stream_receiver_hw.h index 96a362dd55..a03b193a22 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h @@ -46,7 +46,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::TbxCommandStreamReceiverHw(ExecutionEnvir ? this->peekHwInfo().capabilityTable.aubDeviceId : static_cast(debugDeviceId); this->stream = &tbxStream; + this->downloadAllocationImpl = [this](GraphicsAllocation &graphicsAllocation) { + this->downloadAllocationTbx(graphicsAllocation); + }; } template TbxCommandStreamReceiverHw::~TbxCommandStreamReceiverHw() { + this->downloadAllocationImpl = nullptr; + if (streamInitialized) { tbxStream.close(); } @@ -480,14 +485,14 @@ void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocatio volatile uint32_t *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { - downloadAllocation(*this->getTagAllocation()); + this->downloadAllocation(*this->getTagAllocation()); } pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { - downloadAllocation(*graphicsAllocation); + this->downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); } @@ -527,7 +532,7 @@ void TbxCommandStreamReceiverHw::processResidency(const ResidencyCont } template -void TbxCommandStreamReceiverHw::downloadAllocation(GraphicsAllocation &gfxAllocation) { +void TbxCommandStreamReceiverHw::downloadAllocationTbx(GraphicsAllocation &gfxAllocation) { if (hardwareContextController) { hardwareContextController->readMemory(gfxAllocation.getGpuAddress(), gfxAllocation.getUnderlyingBuffer(), gfxAllocation.getUnderlyingBufferSize(), this->getMemoryBank(&gfxAllocation), MemoryConstants::pageSize64k); @@ -552,13 +557,13 @@ void TbxCommandStreamReceiverHw::downloadAllocations() { volatile uint32_t *pollAddress = this->getTagAddress(); for (uint32_t i = 0; i < this->activePartitions; i++) { while (*pollAddress < this->latestFlushedTaskCount) { - downloadAllocation(*this->getTagAllocation()); + this->downloadAllocation(*this->getTagAllocation()); } pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { - downloadAllocation(*graphicsAllocation); + this->downloadAllocation(*graphicsAllocation); } this->allocationsForDownload.clear(); } diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index db2a47d89b..55dbe10723 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -128,6 +128,12 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ const DeviceBitfield deviceBitfield) : BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield), recursiveLockCounter(0), recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) { + this->downloadAllocationImpl = [this](GraphicsAllocation &graphicsAllocation) { + this->downloadAllocationUlt(graphicsAllocation); + }; + } + ~UltCommandStreamReceiver() { + this->downloadAllocationImpl = nullptr; } static CommandStreamReceiver *create(bool withAubDump, ExecutionEnvironment &executionEnvironment, @@ -171,7 +177,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ downloadAllocationCalled = true; } - void downloadAllocation(GraphicsAllocation &gfxAllocation) override { + void downloadAllocationUlt(GraphicsAllocation &gfxAllocation) { downloadAllocationCalled = true; } @@ -339,7 +345,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ bool recordFlusheBatchBuffer = false; bool checkAndActivateAubSubCaptureCalled = false; bool addAubCommentCalled = false; - bool downloadAllocationCalled = false; + std::atomic_bool downloadAllocationCalled = false; bool flushBatchedSubmissionsCalled = false; bool initProgrammingFlagsCalled = false; bool multiOsContextCapable = false; diff --git a/shared/test/common/mocks/mock_tbx_csr.h b/shared/test/common/mocks/mock_tbx_csr.h index 6555c3df22..0726b4cbab 100644 --- a/shared/test/common/mocks/mock_tbx_csr.h +++ b/shared/test/common/mocks/mock_tbx_csr.h @@ -25,7 +25,14 @@ class MockTbxCsr : public TbxCommandStreamReceiverHw { using TbxCommandStreamReceiverHw::writeMemory; using TbxCommandStreamReceiverHw::allocationsForDownload; MockTbxCsr(ExecutionEnvironment &executionEnvironment, const DeviceBitfield deviceBitfield) - : TbxCommandStreamReceiverHw(executionEnvironment, 0, deviceBitfield) {} + : TbxCommandStreamReceiverHw(executionEnvironment, 0, deviceBitfield) { + this->downloadAllocationImpl = [this](GraphicsAllocation &gfxAllocation) { + this->downloadAllocationTbxMock(gfxAllocation); + }; + } + ~MockTbxCsr() { + this->downloadAllocationImpl = nullptr; + } void initializeEngine() override { TbxCommandStreamReceiverHw::initializeEngine(); @@ -50,8 +57,8 @@ class MockTbxCsr : public TbxCommandStreamReceiverHw { TbxCommandStreamReceiverHw::pollForCompletion(); pollForCompletionCalled = true; } - void downloadAllocation(GraphicsAllocation &gfxAllocation) override { - TbxCommandStreamReceiverHw::downloadAllocation(gfxAllocation); + void downloadAllocationTbxMock(GraphicsAllocation &gfxAllocation) { + TbxCommandStreamReceiverHw::downloadAllocationTbx(gfxAllocation); makeCoherentCalled = true; } void dumpAllocation(GraphicsAllocation &gfxAllocation) override { @@ -74,9 +81,18 @@ template struct MockTbxCsrRegisterDownloadedAllocations : TbxCommandStreamReceiverHw { using CommandStreamReceiver::latestFlushedTaskCount; using CommandStreamReceiver::tagsMultiAllocation; - using TbxCommandStreamReceiverHw::TbxCommandStreamReceiverHw; using TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations; - void downloadAllocation(GraphicsAllocation &gfxAllocation) override { + + MockTbxCsrRegisterDownloadedAllocations(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) + : TbxCommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) { + this->downloadAllocationImpl = [this](GraphicsAllocation &gfxAllocation) { + this->downloadAllocationTbxMock(gfxAllocation); + }; + } + ~MockTbxCsrRegisterDownloadedAllocations() { + this->downloadAllocationImpl = nullptr; + } + void downloadAllocationTbxMock(GraphicsAllocation &gfxAllocation) { *reinterpret_cast(CommandStreamReceiver::getTagAllocation()->getUnderlyingBuffer()) = this->latestFlushedTaskCount; downloadedAllocations.insert(&gfxAllocation); } diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index bd05ff2081..c504b91244 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -301,6 +301,7 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpu constexpr auto taskCountToWait = 1; const auto waitStatus = csr.waitForTaskCount(taskCountToWait); EXPECT_EQ(WaitStatus::GpuHang, waitStatus); + EXPECT_TRUE(csr.downloadAllocationCalled); } HWTEST_F(CommandStreamReceiverTest, givenGpuHangAndNonEmptyAllocationsListWhenCallingWaitForTaskCountAndCleanAllocationListThenWaitIsCalledAndGpuHangIsReturned) {