From 080b1e2f66d99a24292e553f84752763a8cde8bf Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Tue, 15 Jun 2021 15:20:41 +0000 Subject: [PATCH] Remove redundant TSP completion tracking logic Signed-off-by: Bartosz Dunajski --- opencl/source/kernel/kernel.cpp | 6 +- ...nd_stream_receiver_with_aub_dump_tests.cpp | 2 - opencl/test/unit_test/event/event_tests.cpp | 2 - .../helpers/timestamp_packet_1_tests.cpp | 86 ------------------- .../performance_counters_tests.cpp | 1 - .../utilities/tag_allocator_tests.cpp | 41 ++------- shared/source/helpers/timestamp_packet.cpp | 2 +- shared/source/helpers/timestamp_packet.h | 14 --- shared/source/utilities/tag_allocator.cpp | 1 - shared/source/utilities/tag_allocator.h | 3 - shared/source/utilities/tag_allocator.inl | 9 -- 11 files changed, 10 insertions(+), 157 deletions(-) diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 4191da400c..d39d96e76e 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1142,8 +1142,10 @@ bool Kernel::hasTunningFinished(KernelSubmissionData &submissionData) { bool Kernel::hasRunFinished(TimestampPacketContainer *timestampContainer) { for (const auto &node : timestampContainer->peekNodes()) { - if (!node->isCompleted()) { - return false; + for (uint32_t i = 0; i < node->getPacketsUsed(); i++) { + if (node->getContextEndValue(i) == 1) { + return false; + } } } return true; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp index 9ba1f88f1d..3bd48e5f74 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp @@ -297,8 +297,6 @@ struct CommandStreamReceiverTagTests : public ::testing::Test { tag->assignDataToAllTimestamps(i, zeros); } - EXPECT_TRUE(tag->isCompleted()); - bool canBeReleased = tag->canBeReleased(); allocator->returnTag(tag); diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 8c6d10f503..3f0aa973b9 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -1129,8 +1129,6 @@ HWTEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) { ASSERT_EQ(0ULL, timeStamps->GlobalCompleteTS); ASSERT_EQ(0ULL, timeStamps->ContextCompleteTS); - EXPECT_TRUE(event->getHwTimeStampNode()->isCompleted()); - HwTimeStamps *timeStamps2 = static_cast *>(event->getHwTimeStampNode())->tagForCpuAccess; ASSERT_EQ(timeStamps, timeStamps2); } diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index 1528a2c5d1..df94a35771 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -68,8 +68,6 @@ HWTEST_F(TimestampPacketTests, givenDebugModeWhereAtomicsAreNotEmittedWhenComman EXPECT_EQ(0u, mockNode.getImplicitCpuDependenciesCount()); tag.packets[0].contextEnd = 0u; tag.packets[0].globalEnd = 0u; - - EXPECT_FALSE(tag.isCompleted()); } HWTEST_F(TimestampPacketTests, givenMultipleDeviesWhenIncrementingCpuDependenciesThenIncrementMultipleTimes) { @@ -121,28 +119,6 @@ TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorr EXPECT_EQ(expectedCounterAddress, TimestampPacketHelper::getGpuDependenciesCountGpuAddress(mockNode)); } -TEST_F(TimestampPacketSimpleTests, whenContextEndTagIsNotOneThenMarkAsCompleted) { - MockTimestampPacketStorage timestampPacketStorage; - auto &packet = timestampPacketStorage.packets[0]; - timestampPacketStorage.initialize(); - - packet.contextEnd = 1; - packet.globalEnd = 1; - EXPECT_FALSE(timestampPacketStorage.isCompleted()); - - packet.contextEnd = 1; - packet.globalEnd = 0; - EXPECT_FALSE(timestampPacketStorage.isCompleted()); - - packet.contextEnd = 0; - packet.globalEnd = 1; - EXPECT_TRUE(timestampPacketStorage.isCompleted()); - - packet.contextEnd = 0; - packet.globalEnd = 0; - EXPECT_TRUE(timestampPacketStorage.isCompleted()); -} - TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMoveAllNodes) { EXPECT_TRUE(std::is_move_constructible::value); EXPECT_TRUE(std::is_move_assignable::value); @@ -178,38 +154,6 @@ TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMov EXPECT_EQ(1u, node1.returnCalls); } -TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTimestampPacketStatus) { - MockTimestampPacketStorage timestampPacketStorage; - auto &packet = timestampPacketStorage.packets[0]; - timestampPacketStorage.initialize(); - - EXPECT_FALSE(timestampPacketStorage.isCompleted()); - packet.contextEnd = 0; - EXPECT_TRUE(timestampPacketStorage.isCompleted()); - packet.globalEnd = 0; - EXPECT_TRUE(timestampPacketStorage.isCompleted()); -} - -TEST_F(TimestampPacketSimpleTests, givenMultiplePacketsInUseWhenCompletionIsCheckedThenVerifyAllUsedNodes) { - MockTimestampPacketStorage timestampPacketStorage; - auto &packets = timestampPacketStorage.packets; - timestampPacketStorage.initialize(); - - timestampPacketStorage.setPacketsUsed(TimestampPacketSizeControl::preferredPacketCount - 1); - - for (uint32_t i = 0; i < timestampPacketStorage.getPacketsUsed() - 1; i++) { - packets[i].contextEnd = 0; - packets[i].globalEnd = 0; - EXPECT_FALSE(timestampPacketStorage.isCompleted()); - } - - packets[timestampPacketStorage.getPacketsUsed() - 1].globalEnd = 0; - EXPECT_FALSE(timestampPacketStorage.isCompleted()); - - packets[timestampPacketStorage.getPacketsUsed() - 1].contextEnd = 0; - EXPECT_TRUE(timestampPacketStorage.isCompleted()); -} - TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) { MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); MockMemoryManager memoryManager(executionEnvironment); @@ -339,7 +283,6 @@ HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAlloc auto tag = csr.getTimestampPacketAllocator()->getTag(); setTagToReadyState(tag); - EXPECT_TRUE(tag->isCompleted()); EXPECT_FALSE(tag->canBeReleased()); } @@ -1403,35 +1346,6 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThen cmdQ->isQueueBlocked(); } -HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenDontKeepDependencyOnPreviousNodeIfItsReady) { - device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; - - MockCommandQueueHw cmdQ(context, device.get(), nullptr); - TimestampPacketContainer previousNodes; - cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false, false); - auto firstNode = cmdQ.timestampPacketContainer->peekNodes().at(0); - setTagToReadyState(firstNode); - - cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - - HardwareParse hwParser; - hwParser.parseCommands(*cmdQ.commandStream, 0); - - uint32_t semaphoresFound = 0; - uint32_t atomicsFound = 0; - for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { - if (genCmdCast(*it)) { - semaphoresFound++; - } - if (genCmdCast(*it)) { - atomicsFound++; - } - } - uint32_t expectedSemaphoresCount = (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0); - EXPECT_EQ(expectedSemaphoresCount, semaphoresFound); - EXPECT_EQ(0u, atomicsFound); -} - HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDependencyOnPreviousNodeIfItsNotReady) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; diff --git a/opencl/test/unit_test/os_interface/performance_counters_tests.cpp b/opencl/test/unit_test/os_interface/performance_counters_tests.cpp index 7148b87ad3..83d9f1db2c 100644 --- a/opencl/test/unit_test/os_interface/performance_counters_tests.cpp +++ b/opencl/test/unit_test/os_interface/performance_counters_tests.cpp @@ -592,7 +592,6 @@ TEST_F(PerformanceCountersMetricsLibraryTest, WhenGettingHwPerfCounterThenValidP ASSERT_NE(nullptr, perfCounter); ASSERT_EQ(0ULL, perfCounter->tagForCpuAccess->report[0]); - EXPECT_TRUE(perfCounter->isCompleted()); auto perfCounter2 = event->getHwPerfCounterNode(); ASSERT_EQ(perfCounter, perfCounter2); diff --git a/opencl/test/unit_test/utilities/tag_allocator_tests.cpp b/opencl/test/unit_test/utilities/tag_allocator_tests.cpp index 251c05955f..3dac3dcffb 100644 --- a/opencl/test/unit_test/utilities/tag_allocator_tests.cpp +++ b/opencl/test/unit_test/utilities/tag_allocator_tests.cpp @@ -34,13 +34,10 @@ struct TagAllocatorTest : public Test { assignDataToAllTimestamps(i, zeros); } setPacketsUsed(packetsUsed); - - EXPECT_TRUE(isCompleted()); } void setToNonReadyState() { packets[0].contextEnd = 1; - EXPECT_FALSE(isCompleted()); } }; @@ -99,6 +96,7 @@ class MockTagAllocator : public TagAllocator { using BaseClass::gfxAllocations; using BaseClass::populateFreeTags; using BaseClass::releaseDeferredTags; + using BaseClass::returnTagToDeferredPool; using BaseClass::rootDeviceIndices; using BaseClass::TagAllocator; using BaseClass::usedTags; @@ -386,15 +384,15 @@ TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhen EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead()); } -TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToDeferredList) { +TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToFreeList) { MockTagAllocator tagAllocator(memoryManager, 1, 1, deviceBitfield); auto node = static_cast *>(tagAllocator.getTag()); node->tagForCpuAccess->setToNonReadyState(); EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); tagAllocator.returnTag(node); - EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); - EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); + EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); + EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); } TEST_F(TagAllocatorTest, givenTagNodeWhenCompletionCheckIsDisabledThenStatusIsMarkedAsNotReady) { @@ -443,40 +441,13 @@ TEST_F(TagAllocatorTest, givenEmptyFreeListWhenAskingForNewTagThenTryToReleaseDe MockTagAllocator tagAllocator(memoryManager, 1, 1, deviceBitfield); auto node = static_cast *>(tagAllocator.getTag()); - node->tagForCpuAccess->setToNonReadyState(); - tagAllocator.returnTag(node); - node->tagForCpuAccess->setToNonReadyState(); + tagAllocator.returnTagToDeferredPool(node); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); node = static_cast *>(tagAllocator.getTag()); EXPECT_NE(nullptr, node); EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); // empty again - new pool wasnt allocated } -TEST_F(TagAllocatorTest, givenTagsOnDeferredListWhenReleasingItThenMoveReadyTagsToFreePool) { - MockTagAllocator tagAllocator(memoryManager, 2, 1, deviceBitfield); // pool with 2 tags - auto node1 = static_cast *>(tagAllocator.getTag()); - auto node2 = static_cast *>(tagAllocator.getTag()); - - node1->tagForCpuAccess->setToNonReadyState(); - node2->tagForCpuAccess->setToNonReadyState(); - tagAllocator.returnTag(node1); - tagAllocator.returnTag(node2); - - tagAllocator.releaseDeferredTags(); - EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); - EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); - - node1->tagForCpuAccess->setTagToReadyState(); - tagAllocator.releaseDeferredTags(); - EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty()); - EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); - - node2->tagForCpuAccess->setTagToReadyState(); - tagAllocator.releaseDeferredTags(); - EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty()); - EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty()); -} - TEST_F(TagAllocatorTest, givenTagAllocatorWhenGraphicsAllocationIsCreatedThenSetValidllocationType) { MockTagAllocator> timestampPacketAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(TimestampPackets), false, mockDeviceBitfield); MockTagAllocator hwTimeStampsAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(HwTimeStamps), false, mockDeviceBitfield); @@ -583,7 +554,6 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet EXPECT_EQ(0u, perfCounterNode.getImplicitGpuDependenciesCount()); EXPECT_ANY_THROW(perfCounterNode.getSinglePacketSize()); EXPECT_ANY_THROW(perfCounterNode.assignDataToAllTimestamps(0, nullptr)); - EXPECT_TRUE(perfCounterNode.isCompleted()); } { @@ -599,7 +569,6 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet EXPECT_EQ(0u, hwTimestampNode.getImplicitGpuDependenciesCount()); EXPECT_ANY_THROW(hwTimestampNode.getSinglePacketSize()); EXPECT_ANY_THROW(hwTimestampNode.assignDataToAllTimestamps(0, nullptr)); - EXPECT_TRUE(hwTimestampNode.isCompleted()); EXPECT_ANY_THROW(hwTimestampNode.getQueryHandleRef()); } diff --git a/shared/source/helpers/timestamp_packet.cpp b/shared/source/helpers/timestamp_packet.cpp index abd12b8977..c5d737bda2 100644 --- a/shared/source/helpers/timestamp_packet.cpp +++ b/shared/source/helpers/timestamp_packet.cpp @@ -31,7 +31,7 @@ void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) { std::vector pendingNodes; for (auto node : timestampPacketNodes) { - if (node->canBeReleased() || clearAllDependencies) { + if (clearAllDependencies) { node->returnTag(); } else { pendingNodes.push_back(node); diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index ad5ebc3725..fd38ccc703 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -49,20 +49,6 @@ class TimestampPackets : public TagTypeBase { static constexpr size_t getSinglePacketSize() { return sizeof(Packet); } - bool isCompleted() const { - if (DebugManager.flags.DisableAtomicForPostSyncs.get() == 1) { - return false; - } - - for (uint32_t i = 0; i < packetsUsed; i++) { - if (packets[i].contextEnd == 1) { - return false; - } - } - - return true; - } - void initialize() { for (auto &packet : packets) { packet.contextStart = 1u; diff --git a/shared/source/utilities/tag_allocator.cpp b/shared/source/utilities/tag_allocator.cpp index 15f08cad0b..89c979c5ae 100644 --- a/shared/source/utilities/tag_allocator.cpp +++ b/shared/source/utilities/tag_allocator.cpp @@ -41,7 +41,6 @@ void TagNodeBase::returnTag() { bool TagNodeBase::canBeReleased() const { return (!doNotReleaseNodes) && - (isCompleted()) && (getImplicitGpuDependenciesCount() == getImplicitCpuDependenciesCount()); } diff --git a/shared/source/utilities/tag_allocator.h b/shared/source/utilities/tag_allocator.h index 460951f430..0d9ccc33de 100644 --- a/shared/source/utilities/tag_allocator.h +++ b/shared/source/utilities/tag_allocator.h @@ -61,7 +61,6 @@ class TagNodeBase : public NonCopyableOrMovableClass { const TagAllocatorBase *getAllocator() const { return allocator; } // TagType specific calls - virtual bool isCompleted() const = 0; virtual void assignDataToAllTimestamps(uint32_t packetIndex, void *source) = 0; virtual size_t getGlobalStartOffset() const = 0; @@ -121,8 +120,6 @@ class TagNode : public TagNodeBase, public IDNode> { void assignDataToAllTimestamps(uint32_t packetIndex, void *source) override; - bool isCompleted() const override; - size_t getGlobalStartOffset() const override; size_t getContextStartOffset() const override; size_t getContextEndOffset() const override; diff --git a/shared/source/utilities/tag_allocator.inl b/shared/source/utilities/tag_allocator.inl index 42a3f775fe..191d28cdd2 100644 --- a/shared/source/utilities/tag_allocator.inl +++ b/shared/source/utilities/tag_allocator.inl @@ -280,15 +280,6 @@ void TagNode::assignDataToAllTimestamps(uint32_t packetIndex, void *sou } } -template -bool TagNode::isCompleted() const { - if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) { - return tagForCpuAccess->isCompleted(); - } else { - return true; - } -} - template MetricsLibraryApi::QueryHandle_1_0 &TagNode::getQueryHandleRef() const { if constexpr (TagType::getTagNodeType() == TagNodeType::HwPerfCounter) {