refactor: add debug prints for timestamp waits

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
2026-01-11 08:07:19 +08:00 · 2024-08-30 17:33:28 +00:00
parent 8a80673ca7
commit 847180dc9a
4 changed files with 122 additions and 0 deletions
--- a/opencl/source/command_queue/command_queue_hw_base.inl
+++ b/opencl/source/command_queue/command_queue_hw_base.inl
@@ -142,6 +142,7 @@ bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {

 template <typename TSPacketType>
 inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container, CommandStreamReceiver &csr, WaitStatus &status) {
+    bool printWaitForCompletion = debugManager.flags.LogWaitingForCompletion.get();
    bool waited = false;
    status = WaitStatus::notReady;

@@ -149,14 +150,23 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container
        auto lastHangCheckTime = std::chrono::high_resolution_clock::now();
        for (const auto &timestamp : container->peekNodes()) {
            for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
+                if (printWaitForCompletion) {
+                    printf("\nWaiting for TS 0x%" PRIx64, timestamp->getGpuAddress() + (i * timestamp->getSinglePacketSize()));
+                }
                while (timestamp->getContextEndValue(i) == 1) {
                    csr.downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(csr.getRootDeviceIndex()));
                    WaitUtils::waitFunctionWithPredicate<const TSPacketType>(static_cast<TSPacketType const *>(timestamp->getContextEndAddress(i)), 1u, std::not_equal_to<TSPacketType>());
                    if (csr.checkGpuHangDetected(std::chrono::high_resolution_clock::now(), lastHangCheckTime)) {
                        status = WaitStatus::gpuHang;
+                        if (printWaitForCompletion) {
+                            printf("\nWaiting for TS failed");
+                        }
                        return false;
                    }
                }
+                if (printWaitForCompletion) {
+                    printf("\nWaiting for TS completed");
+                }
                status = WaitStatus::ready;
                waited = true;
            }
--- a/opencl/source/event/event.cpp
+++ b/opencl/source/event/event.cpp
@@ -824,14 +824,25 @@ bool Event::isWaitForTimestampsEnabled() const {
 bool Event::areTimestampsCompleted() {
    if (this->timestampPacketContainer.get()) {
        if (this->isWaitForTimestampsEnabled()) {
+            bool printWaitForCompletion = debugManager.flags.LogWaitingForCompletion.get();
+
            for (const auto &timestamp : this->timestampPacketContainer->peekNodes()) {
                for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
+                    if (printWaitForCompletion) {
+                        printf("\nChecking TS 0x%" PRIx64, timestamp->getGpuAddress() + (i * timestamp->getSinglePacketSize()));
+                    }
                    this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(this->cmdQueue->getGpgpuCommandStreamReceiver().getRootDeviceIndex()));
                    if (timestamp->getContextEndValue(i) == 1) {
+                        if (printWaitForCompletion) {
+                            printf("\nTS not ready");
+                        }
                        return false;
                    }
                }
            }
+            if (printWaitForCompletion) {
+                printf("\nTS ready");
+            }
            this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocations(true);
            const auto &bcsStates = this->cmdQueue->peekActiveBcsStates();
            for (auto currentBcsIndex = 0u; currentBcsIndex < bcsStates.size(); currentBcsIndex++) {
--- a/opencl/test/unit_test/mocks/mock_event.h
+++ b/opencl/test/unit_test/mocks/mock_event.h
@@ -35,6 +35,7 @@ struct MockEvent : public BaseEventType {
    FORWARD_FUNC(submitCommand, BaseEventType);

    using BaseEventType::timeStampNode;
+    using Event::areTimestampsCompleted;
    using Event::calcProfilingData;
    using Event::cmdToSubmit;
    using Event::isWaitForTimestampsEnabled;
--- a/opencl/test/unit_test/profiling/profiling_tests.cpp
+++ b/opencl/test/unit_test/profiling/profiling_tests.cpp
@@ -579,6 +579,106 @@ HWTEST_F(ProfilingTests, givenNonKernelEnqueueWhenNonBlockedEnqueueThenSetCpuPat
    eventObj->release();
 }

+HWTEST_F(ProfilingTests, givenDebugFlagSetWhenWaitingForTimestampThenPrint) {
+    DebugManagerStateRestore restorer;
+    debugManager.flags.LogWaitingForCompletion.set(true);
+    debugManager.flags.EnableTimestampWaitForQueues.set(4);
+    debugManager.flags.EnableTimestampWaitForEvents.set(4);
+
+    using TimestampPacketType = typename FamilyType::TimestampPacketType;
+    using TimestampPacketsT = TimestampPackets<TimestampPacketType, FamilyType::timestampPacketCount>;
+
+    MockGraphicsAllocation alloc;
+    MultiGraphicsAllocation multiAlloc(1);
+    multiAlloc.addAllocation(&alloc);
+
+    struct MyMockTagNode : public TagNode<TimestampPacketsT> {
+      public:
+        using TagNode<TimestampPacketsT>::gfxAllocation;
+        MyMockTagNode(MultiGraphicsAllocation *alloc) {
+            this->gfxAllocation = alloc;
+        }
+        uint64_t getContextEndValue(uint32_t packetIndex) const override {
+            EXPECT_NE(0u, failCountdown);
+            failCountdown--;
+
+            if (failCountdown == 0) {
+                storage = 123;
+            }
+
+            return storage;
+        }
+
+        void const *getContextEndAddress(uint32_t packetIndex) const override {
+            return &storage;
+        }
+
+        void returnTag() override {}
+
+        mutable uint32_t failCountdown = 2;
+        mutable uint32_t storage = 1;
+    };
+
+    auto node = std::make_unique<MyMockTagNode>(&multiAlloc);
+
+    auto queue = std::make_unique<MockCommandQueueHw<FamilyType>>(pCmdQ->getContextPtr(), pClDevice, nullptr);
+    queue->timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
+    auto container = queue->timestampPacketContainer.get();
+
+    testing::internal::CaptureStdout();
+
+    Range<CopyEngineState> copyEngineStates;
+    WaitStatus status;
+
+    container->add(node.get());
+
+    queue->waitForTimestamps(copyEngineStates, status, container, nullptr);
+
+    std::string output = testing::internal::GetCapturedStdout();
+
+    EXPECT_NE(output.npos, output.find("Waiting for TS 0x"));
+    EXPECT_NE(output.npos, output.find("Waiting for TS completed"));
+    EXPECT_EQ(output.npos, output.find("Waiting for TS failed"));
+
+    testing::internal::CaptureStdout();
+
+    auto &csr = static_cast<UltCommandStreamReceiver<FamilyType> &>(queue->getGpgpuCommandStreamReceiver());
+    csr.forceReturnGpuHang = true;
+    node->failCountdown = 2;
+    node->storage = 1;
+    queue->waitForTimestamps(copyEngineStates, status, container, nullptr);
+
+    output = testing::internal::GetCapturedStdout();
+
+    EXPECT_NE(output.npos, output.find("Waiting for TS 0x"));
+    EXPECT_EQ(output.npos, output.find("Waiting for TS completed"));
+    EXPECT_NE(output.npos, output.find("Waiting for TS failed"));
+
+    MockEvent<Event> event(queue.get(), CL_COMMAND_READ_BUFFER, 0, 0);
+    event.timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
+
+    event.timestampPacketContainer->add(node.get());
+
+    testing::internal::CaptureStdout();
+    node->failCountdown = 2;
+    node->storage = 1;
+    event.areTimestampsCompleted();
+
+    output = testing::internal::GetCapturedStdout();
+
+    EXPECT_NE(output.npos, output.find("Checking TS 0x"));
+    EXPECT_EQ(output.npos, output.find("TS ready"));
+    EXPECT_NE(output.npos, output.find("TS not ready"));
+
+    testing::internal::CaptureStdout();
+    event.areTimestampsCompleted();
+    output = testing::internal::GetCapturedStdout();
+
+    EXPECT_NE(output.npos, output.find("Checking TS 0x"));
+    EXPECT_NE(output.npos, output.find("TS ready"));
+    EXPECT_EQ(output.npos, output.find("TS not ready"));
+}
+
 using EventProfilingTest = ProfilingTests;

 HWCMDTEST_F(IGFX_GEN8_CORE, EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEndTimestampInCompleteTimestampAndDoesntCallOsTimeMethods) {