diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 16c3a59793..bc13fced53 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1209,7 +1209,7 @@ WaitStatus CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *pri } auto waitStatus = WaitStatus::NotReady; - auto waitedOnTimestamps = waitForTimestamps(activeBcsStates, taskCount, waitStatus); + auto waitedOnTimestamps = waitForTimestamps(activeBcsStates, taskCount, waitStatus, this->timestampPacketContainer.get(), this->deferredTimestampPackets.get()); if (waitStatus == WaitStatus::GpuHang) { return WaitStatus::GpuHang; } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 4251ac82da..37fdc5b0c4 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -204,7 +204,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState); bool isWaitForTimestampsEnabled() const; - virtual bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status) = 0; + virtual bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) = 0; MOCKABLE_VIRTUAL bool isQueueBlocked(); diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 3d3eeeccc1..216ce6de07 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -427,7 +427,7 @@ class CommandQueueHw : public CommandQueue { bool isCacheFlushCommand(uint32_t commandType) const override; - bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status) override; + bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override; MOCKABLE_VIRTUAL bool isCacheFlushForBcsRequired() const; diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 5b7007be52..8040661212 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -167,14 +167,14 @@ inline bool waitForTimestampsWithinContainer(TimestampPacketContainer *container } template -bool CommandQueueHw::waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status) { +bool CommandQueueHw::waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) { using TSPacketType = typename Family::TimestampPacketType; bool waited = false; if (isWaitForTimestampsEnabled()) { - waited = waitForTimestampsWithinContainer(timestampPacketContainer.get(), getGpgpuCommandStreamReceiver(), status); + waited = waitForTimestampsWithinContainer(mainContainer, getGpgpuCommandStreamReceiver(), status); if (isOOQEnabled()) { - waitForTimestampsWithinContainer(deferredTimestampPackets.get(), getGpgpuCommandStreamReceiver(), status); + waitForTimestampsWithinContainer(deferredContainer, getGpgpuCommandStreamReceiver(), status); } if (waited) { diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 956ec73a36..3236cc99d4 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -428,7 +428,9 @@ inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) { } Range states{&bcsState, bcsState.isValid() ? 1u : 0u}; - const auto waitStatus = cmdQueue->waitUntilComplete(taskCount.load(), states, flushStamp->peekStamp(), useQuickKmdSleep); + auto waitStatus = WaitStatus::NotReady; + auto waitedOnTimestamps = cmdQueue->waitForTimestamps(states, taskCount.load(), waitStatus, this->timestampPacketContainer.get(), nullptr); + waitStatus = cmdQueue->waitUntilComplete(taskCount.load(), states, flushStamp->peekStamp(), useQuickKmdSleep, true, waitedOnTimestamps); if (waitStatus == WaitStatus::GpuHang) { return WaitStatus::GpuHang; } diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp index ebf405623f..782b71dfc9 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp @@ -52,7 +52,7 @@ HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoW auto taskCount = device->getUltCommandStreamReceiver().peekLatestFlushedTaskCount(); auto status = WaitStatus::NotReady; - cmdQ.waitForTimestamps({}, 101u, status); + cmdQ.waitForTimestamps({}, 101u, status, cmdQ.timestampPacketContainer.get(), cmdQ.deferredTimestampPackets.get()); EXPECT_EQ(device->getUltCommandStreamReceiver().peekLatestFlushedTaskCount(), taskCount); } diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 02d4d540b3..0955a3b483 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -909,6 +909,16 @@ TEST_F(InternalsEventTest, givenPassingEventWhenWaitingForEventsThenWaititingIsS EXPECT_NE(Event::executionAbortedDueToGpuHang, passingEvent.peekExecutionStatus()); } +TEST_F(InternalsEventTest, givenEventWhenWaitThenWaitForTimestampsCalled) { + MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); + MockEvent event(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + EXPECT_FALSE(cmdQ.waitForTimestampsCalled); + + event.wait(false, false); + + EXPECT_TRUE(cmdQ.waitForTimestampsCalled); +} + TEST_F(InternalsEventTest, GivenProfilingWHENMapOperationTHENTimesSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index c123735dcd..533edb0ce5 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -212,10 +212,13 @@ class MockCommandQueue : public CommandQueue { bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const override { return isCacheFlushRequired; } - bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status) override { return false; }; + bool waitForTimestamps(Range copyEnginesToWait, uint32_t taskCount, WaitStatus &status, TimestampPacketContainer *mainContainer, TimestampPacketContainer *deferredContainer) override { + waitForTimestampsCalled = true; + return false; + }; bool releaseIndirectHeapCalled = false; - + bool waitForTimestampsCalled = false; cl_int writeBufferRetValue = CL_SUCCESS; uint32_t writeBufferCounter = 0; bool writeBufferBlocking = false;