diff --git a/shared/source/command_container/cmdcontainer.cpp b/shared/source/command_container/cmdcontainer.cpp index 321f1e3ebd..78d71beb3f 100644 --- a/shared/source/command_container/cmdcontainer.cpp +++ b/shared/source/command_container/cmdcontainer.cpp @@ -296,24 +296,29 @@ void CommandContainer::handleCmdBufferAllocations(size_t startIndex) { } for (size_t i = startIndex; i < cmdBufferAllocations.size(); i++) { if (this->reusableAllocationList) { - - if (isHandleFenceCompletionRequired) { - std::vector> locks; - for (auto &engine : this->device->getMemoryManager()->getRegisteredEngines(cmdBufferAllocations[i]->getRootDeviceIndex())) { - if (cmdBufferAllocations[i]->isUsedByOsContext(engine.osContext->getContextId())) { - locks.push_back(engine.commandStreamReceiver->obtainUniqueOwnership()); - engine.commandStreamReceiver->stopDirectSubmission(false); - } - } - if (!locks.empty()) { - this->device->getMemoryManager()->handleFenceCompletion(cmdBufferAllocations[i]); + bool allocationHandled = false; + for (auto &engine : this->device->getMemoryManager()->getRegisteredEngines(cmdBufferAllocations[i]->getRootDeviceIndex())) { + auto osContextId = engine.osContext->getContextId(); + if (cmdBufferAllocations[i]->isUsedByOsContext(osContextId) && engine.commandStreamReceiver->isAnyDirectSubmissionEnabled()) { + auto lock = engine.commandStreamReceiver->obtainUniqueOwnership(); + auto taskCount = engine.commandStreamReceiver->peekTaskCount() + 1; + cmdBufferAllocations[i]->updateTaskCount(taskCount, osContextId); + cmdBufferAllocations[i]->updateResidencyTaskCount(taskCount, osContextId); + engine.commandStreamReceiver->flushTagUpdate(); + engine.commandStreamReceiver->waitForTaskCount(taskCount); + allocationHandled = true; } } + if (!allocationHandled && isHandleFenceCompletionRequired) { + this->device->getMemoryManager()->handleFenceCompletion(cmdBufferAllocations[i]); + } for (auto &engine : this->device->getMemoryManager()->getRegisteredEngines(cmdBufferAllocations[i]->getRootDeviceIndex())) { - cmdBufferAllocations[i]->releaseUsageInOsContext(engine.osContext->getContextId()); + auto osContextId = engine.osContext->getContextId(); + cmdBufferAllocations[i]->releaseUsageInOsContext(osContextId); } - reusableAllocationList->pushFrontOne(*cmdBufferAllocations[i]); + + reusableAllocationList->pushTailOne(*cmdBufferAllocations[i]); } else { this->device->getMemoryManager()->freeGraphicsMemory(cmdBufferAllocations[i]); } @@ -328,7 +333,7 @@ GraphicsAllocation *CommandContainer::obtainNextCommandBufferAllocation(bool for forceHostMemory &= this->useSecondaryCommandStream; GraphicsAllocation *cmdBufferAllocation = nullptr; if (this->reusableAllocationList) { - size_t alignedSize = getAlignedCmdBufferSize(); + const size_t alignedSize = getAlignedCmdBufferSize(); cmdBufferAllocation = this->reusableAllocationList->detachAllocation(alignedSize, nullptr, forceHostMemory, nullptr, AllocationType::commandBuffer).release(); } if (!cmdBufferAllocation) { diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 0b75a6c078..ba011945e4 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -336,7 +336,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ } SubmissionStatus flushTagUpdate() override { flushTagUpdateCalled = true; - return CommandStreamReceiverHw::flushTagUpdate(); + auto ret = SubmissionStatus::success; + if (this->callFlushTagUpdate) { + ret = CommandStreamReceiverHw::flushTagUpdate(); + } + return ret; } void initProgrammingFlags() override { @@ -505,6 +509,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ std::atomic_bool downloadAllocationsCalled = false; bool flushBatchedSubmissionsCalled = false; bool flushTagUpdateCalled = false; + bool callFlushTagUpdate = true; bool initProgrammingFlagsCalled = false; bool multiOsContextCapable = false; bool memoryCompressionEnabled = false; diff --git a/shared/test/unit_test/command_container/command_container_tests.cpp b/shared/test/unit_test/command_container/command_container_tests.cpp index ed7986f253..d92c0e1599 100644 --- a/shared/test/unit_test/command_container/command_container_tests.cpp +++ b/shared/test/unit_test/command_container/command_container_tests.cpp @@ -309,6 +309,9 @@ HWTEST_F(CommandContainerTest, givenCmdContainerAndHandleFenceWithAllocsListWhen cmdContainer->initialize(pDevice, &allocList, true, HeapSize::defaultHeapSize, false); auto &cmdBufferAllocs = cmdContainer->getCmdBufferAllocations(); auto memoryManager = static_cast(pDevice->getMemoryManager()); + auto csr = reinterpret_cast *>(memoryManager->getRegisteredEngines(0u)[0].commandStreamReceiver); + csr->directSubmissionAvailable = true; + csr->callFlushTagUpdate = false; EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 0u); EXPECT_EQ(cmdBufferAllocs.size(), 1u); EXPECT_TRUE(allocList.peekIsEmpty()); @@ -318,10 +321,9 @@ HWTEST_F(CommandContainerTest, givenCmdContainerAndHandleFenceWithAllocsListWhen auto cmdBuffer0 = cmdBufferAllocs[0]; auto cmdBuffer1 = cmdBufferAllocs[1]; - auto csr = reinterpret_cast *>(memoryManager->getRegisteredEngines(0u)[0].commandStreamReceiver); cmdContainer->reset(); - EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 0u); + EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 1u); EXPECT_EQ(cmdBufferAllocs.size(), 1u); EXPECT_EQ(cmdBufferAllocs[0], cmdBuffer0); EXPECT_FALSE(allocList.peekIsEmpty()); @@ -330,13 +332,12 @@ HWTEST_F(CommandContainerTest, givenCmdContainerAndHandleFenceWithAllocsListWhen cmdContainer->allocateNextCommandBuffer(); EXPECT_EQ(cmdBufferAllocs.size(), 2u); - cmdBuffer1->updateTaskCount(1u, 0u); cmdContainer->reset(); - EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 1u); + EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 2u); EXPECT_EQ(cmdBufferAllocs.size(), 1u); EXPECT_EQ(cmdBufferAllocs[0], cmdBuffer0); EXPECT_FALSE(allocList.peekIsEmpty()); - EXPECT_TRUE(csr->stopDirectSubmissionCalled); + EXPECT_FALSE(csr->stopDirectSubmissionCalled); EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking); cmdContainer->allocateNextCommandBuffer(); @@ -344,18 +345,18 @@ HWTEST_F(CommandContainerTest, givenCmdContainerAndHandleFenceWithAllocsListWhen EXPECT_EQ(cmdBufferAllocs[0], cmdBuffer0); EXPECT_EQ(cmdBufferAllocs[1], cmdBuffer1); EXPECT_TRUE(allocList.peekIsEmpty()); - EXPECT_TRUE(csr->stopDirectSubmissionCalled); + EXPECT_FALSE(csr->stopDirectSubmissionCalled); EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking); cmdBuffer1->updateTaskCount(1u, 0u); cmdContainer.reset(); - EXPECT_TRUE(csr->stopDirectSubmissionCalled); + EXPECT_FALSE(csr->stopDirectSubmissionCalled); EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking); csr = reinterpret_cast *>(memoryManager->getRegisteredEngines(0u)[1].commandStreamReceiver); EXPECT_FALSE(csr->stopDirectSubmissionCalled); EXPECT_FALSE(csr->stopDirectSubmissionCalledBlocking); - EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 2u); + EXPECT_EQ(memoryManager->handleFenceCompletionCalled, 3u); EXPECT_FALSE(allocList.peekIsEmpty()); cmdBuffer1->releaseUsageInOsContext(0u); allocList.freeAllGraphicsAllocations(pDevice); @@ -376,13 +377,13 @@ TEST_F(CommandContainerTest, givenReusableAllocationsAndRemoveUserFenceInCmdlist EXPECT_EQ(cmdBufferAllocs.size(), 2u); cmdContainer->reset(); - EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); + EXPECT_EQ(1u, memoryManager->handleFenceCompletionCalled); cmdContainer->allocateNextCommandBuffer(); EXPECT_EQ(cmdBufferAllocs.size(), 2u); cmdBufferAllocs[1]->updateTaskCount(2u, 0u); cmdContainer->reset(); - EXPECT_EQ(1u, memoryManager->handleFenceCompletionCalled); + EXPECT_EQ(2u, memoryManager->handleFenceCompletionCalled); cmdContainer->allocateNextCommandBuffer(); EXPECT_EQ(cmdBufferAllocs.size(), 2u); EXPECT_FALSE(cmdBufferAllocs[1]->isUsedByOsContext(0u)); @@ -390,7 +391,40 @@ TEST_F(CommandContainerTest, givenReusableAllocationsAndRemoveUserFenceInCmdlist cmdBufferAllocs[0]->updateTaskCount(5u, 0u); cmdBufferAllocs[1]->updateTaskCount(5u, 0u); cmdContainer.reset(); - EXPECT_EQ(3u, memoryManager->handleFenceCompletionCalled); + EXPECT_EQ(4u, memoryManager->handleFenceCompletionCalled); + allocList.freeAllGraphicsAllocations(pDevice); +} + +TEST_F(CommandContainerTest, givenReusableAllocationsAndRemoveUserFenceInCmdlistResetAndDestroyFlagSetWhenAllocateAndResetThenHandleFenceCompletionIsNotCalled) { + DebugManagerStateRestore restore; + debugManager.flags.RemoveUserFenceInCmdlistResetAndDestroy.set(1); + + AllocationsList allocList; + auto cmdContainer = std::make_unique(); + cmdContainer->initialize(pDevice, &allocList, HeapSize::defaultHeapSize, true, false); + auto &cmdBufferAllocs = cmdContainer->getCmdBufferAllocations(); + auto memoryManager = static_cast(pDevice->getMemoryManager()); + EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); + EXPECT_EQ(cmdBufferAllocs.size(), 1u); + cmdContainer->allocateNextCommandBuffer(); + EXPECT_EQ(cmdBufferAllocs.size(), 2u); + + cmdContainer->reset(); + EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); + cmdContainer->allocateNextCommandBuffer(); + EXPECT_EQ(cmdBufferAllocs.size(), 2u); + + cmdBufferAllocs[1]->updateTaskCount(2u, 0u); + cmdContainer->reset(); + EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); + cmdContainer->allocateNextCommandBuffer(); + EXPECT_EQ(cmdBufferAllocs.size(), 2u); + EXPECT_FALSE(cmdBufferAllocs[1]->isUsedByOsContext(0u)); + + cmdBufferAllocs[0]->updateTaskCount(5u, 0u); + cmdBufferAllocs[1]->updateTaskCount(5u, 0u); + cmdContainer.reset(); + EXPECT_EQ(0u, memoryManager->handleFenceCompletionCalled); allocList.freeAllGraphicsAllocations(pDevice); }