diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index e6f015ec2c..fe5301cac8 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -187,15 +187,6 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState return false; } -void CommandQueue::waitForLatestTaskCount() { - TimestampPacketContainer nodesToRelease; - if (deferredTimestampPackets) { - deferredTimestampPackets->swapNodes(nodesToRelease); - } - - waitUntilComplete(taskCount, this->bcsState.taskCount, flushStamp->peekStamp(), false); -} - void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { WAIT_ENTER() @@ -918,13 +909,18 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co } } -void CommandQueue::waitUntilComplete(bool blockedQueue, PrintfHandler *printfHandler) { +void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) { if (blockedQueue) { while (isQueueBlocked()) { } } - waitForLatestTaskCount(); + TimestampPacketContainer nodesToRelease; + if (deferredTimestampPackets) { + deferredTimestampPackets->swapNodes(nodesToRelease); + } + + waitUntilComplete(taskCount, this->bcsState.taskCount, flushStamp->peekStamp(), false); if (printfHandler) { printfHandler->printEnqueueOutput(); diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 5e53cf0a1b..f971b63dce 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -217,7 +217,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL bool isQueueBlocked(); MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep); - MOCKABLE_VIRTUAL void waitUntilComplete(bool blockedQueue, PrintfHandler *printfHandler); + MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler); static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel, cl_uint numEventsInWaitList, @@ -360,7 +360,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const; void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo); virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0; - void waitForLatestTaskCount(); Context *context = nullptr; ClDevice *device = nullptr; diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 33fc7b028d..88278e348f 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -409,7 +409,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, commandStreamRecieverOwnership.unlock(); if (blocking) { - waitUntilComplete(blockQueue, (blockQueue ? nullptr : printfHandler.get())); + waitForAllEngines(blockQueue, (blockQueue ? nullptr : printfHandler.get())); } if (migratedMemory) { getGpgpuCommandStreamReceiver().flushBatchedSubmissions(); @@ -1249,7 +1249,7 @@ void CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDispat commandStreamRecieverOwnership.unlock(); if (blocking) { - waitUntilComplete(blockQueue, nullptr); + waitForAllEngines(blockQueue, nullptr); } } diff --git a/opencl/source/command_queue/finish.h b/opencl/source/command_queue/finish.h index ae0cecdd9e..42d8e4d13b 100644 --- a/opencl/source/command_queue/finish.h +++ b/opencl/source/command_queue/finish.h @@ -19,12 +19,8 @@ cl_int CommandQueueHw::finish() { return CL_OUT_OF_RESOURCES; } - //as long as queue is blocked we need to stall. - while (isQueueBlocked()) - ; - - // Stall until HW reaches CQ taskCount - waitForLatestTaskCount(); + // Stall until HW reaches taskCount on all its engines + waitForAllEngines(true, nullptr); return CL_SUCCESS; } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index e19621483d..e334e8de02 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -1031,7 +1031,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); - commandQueue.waitUntilComplete(false, nullptr); + commandQueue.waitForAllEngines(false, nullptr); parseCommands(mockCsr->getCS(4096u)); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); @@ -1059,7 +1059,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate mockCsr->useGpuIdleImplicitFlush = false; mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch); - commandQueue.waitUntilComplete(false, nullptr); + commandQueue.waitForAllEngines(false, nullptr); parseCommands(mockCsr->getCS(4096u)); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); diff --git a/opencl/test/unit_test/program/printf_handler_tests.cpp b/opencl/test/unit_test/program/printf_handler_tests.cpp index ebab569403..9bca4d327e 100644 --- a/opencl/test/unit_test/program/printf_handler_tests.cpp +++ b/opencl/test/unit_test/program/printf_handler_tests.cpp @@ -169,7 +169,7 @@ HWTEST_F(PrintfHandlerTests, givenPrintfHandlerWhenEnqueueIsBlockedThenDontUsePr using CommandQueueHw::CommandQueueHw; using CommandQueueHw::enqueueKernel; - void waitUntilComplete(bool blockedQueue, PrintfHandler *printfHandler) override { + void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) override { waitCalled = true; printfHandlerUsedForWait = printfHandler; }