From 86dc5bacc714696abf4d6a9964311aadc5eef4d3 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Thu, 25 Jun 2020 11:35:29 +0200 Subject: [PATCH] Optimize BCS flushing scheme [2/n] Change-Id: I6f1e0115b9c45f89afb86f8fd2304604243541df Signed-off-by: Bartosz Dunajski --- opencl/source/command_queue/command_queue.cpp | 16 +-- opencl/source/command_queue/command_queue.h | 3 +- .../cpu_data_transfer_handler.cpp | 2 +- opencl/source/command_queue/enqueue_common.h | 8 +- opencl/source/command_queue/finish.h | 2 +- opencl/source/event/event.cpp | 21 ++- opencl/source/event/event.h | 12 +- opencl/source/helpers/task_information.cpp | 4 +- .../command_queue/blit_enqueue_tests.cpp | 126 ++++++++++++++++++ .../command_queue/command_queue_hw_tests.cpp | 4 +- .../command_queue/command_queue_tests.cpp | 2 +- .../command_queue/enqueue_handler_tests.cpp | 12 +- .../command_queue/enqueue_kernel_2_tests.cpp | 4 +- .../event/async_events_handler_tests.cpp | 74 +++++----- .../unit_test/event/event_builder_tests.cpp | 66 ++++++--- opencl/test/unit_test/event/event_tests.cpp | 16 +-- .../unit_test/event/event_tracker_tests.cpp | 22 +-- .../unit_test/helpers/kmd_notify_tests.cpp | 19 ++- .../test/unit_test/mocks/mock_command_queue.h | 12 +- 19 files changed, 291 insertions(+), 134 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index dbbb6ee2bf..13e1090603 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -147,28 +147,28 @@ bool CommandQueue::isCompleted(uint32_t taskCount) const { return tag >= taskCount; } -void CommandQueue::waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { +void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { WAIT_ENTER() - DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", taskCountToWait); + DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag()); bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW; - getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, + getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode); - DEBUG_BREAK_IF(getHwTag() < taskCountToWait); + DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait); if (gtpinIsGTPinInitialized()) { - gtpinNotifyTaskCompletion(taskCountToWait); + gtpinNotifyTaskCompletion(gpgpuTaskCountToWait); } if (auto bcsCsr = getBcsCommandStreamReceiver()) { - bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCount, 0, false, false); - bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCount); + bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false); + bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait); } - getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(taskCountToWait); + getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait); WAIT_LEAVE() } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 7d950ee4b4..a27f7b6d93 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -215,7 +215,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL bool isQueueBlocked(); - MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep); + MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep); static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel, cl_uint numEventsInWaitList, @@ -299,6 +299,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { } void updateBcsTaskCount(uint32_t newBcsTaskCount) { this->bcsTaskCount = newBcsTaskCount; } + uint32_t peekBcsTaskCount() const { return bcsTaskCount; } // taskCount of last task uint32_t taskCount = 0; diff --git a/opencl/source/command_queue/cpu_data_transfer_handler.cpp b/opencl/source/command_queue/cpu_data_transfer_handler.cpp index 4c08840dc1..c7a3c9e23a 100644 --- a/opencl/source/command_queue/cpu_data_transfer_handler.cpp +++ b/opencl/source/command_queue/cpu_data_transfer_handler.cpp @@ -145,7 +145,7 @@ void *CommandQueue::cpuDataTransferHandler(TransferProperties &transferPropertie if (outEventObj) { outEventObj->setEndTimeStamp(); - outEventObj->updateTaskCount(this->taskCount); + outEventObj->updateTaskCount(this->taskCount, this->bcsTaskCount); outEventObj->flushStamp->replaceStampObject(this->flushStamp->getStampReference()); if (eventCompleted) { outEventObj->setStatus(CL_COMPLETE); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index d367bf05cb..a1dbf91ed1 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -305,7 +305,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, getGpgpuCommandStreamReceiver().setMediaVFEStateDirty(true); if (devQueueHw->getSchedulerReturnInstance() > 0) { - waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false); + waitUntilComplete(completionStamp.taskCount, bcsTaskCount, completionStamp.flushStamp, false); this->runSchedulerSimulation(*devQueueHw, *parentKernel); } } @@ -353,7 +353,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, updateFromCompletionStamp(completionStamp); if (eventBuilder.getEvent()) { - eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, completionStamp.taskLevel, completionStamp.flushStamp); + eventBuilder.getEvent()->updateCompletionStamp(completionStamp.taskCount, bcsTaskCount, completionStamp.taskLevel, completionStamp.flushStamp); FileLoggerInstance().log(DebugManager.flags.EventsDebugEnable.get(), "updateCompletionStamp Event", eventBuilder.getEvent(), "taskLevel", eventBuilder.getEvent()->taskLevel.load()); } @@ -382,9 +382,9 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (blockQueue) { while (isQueueBlocked()) { } - waitUntilComplete(taskCount, flushStamp->peekStamp(), false); + waitUntilComplete(taskCount, bcsTaskCount, flushStamp->peekStamp(), false); } else { - waitUntilComplete(taskCount, flushStamp->peekStamp(), false); + waitUntilComplete(taskCount, bcsTaskCount, flushStamp->peekStamp(), false); if (printfHandler) { printfHandler->printEnqueueOutput(); } diff --git a/opencl/source/command_queue/finish.h b/opencl/source/command_queue/finish.h index 6e9b474fe7..98cc12998b 100644 --- a/opencl/source/command_queue/finish.h +++ b/opencl/source/command_queue/finish.h @@ -27,7 +27,7 @@ cl_int CommandQueueHw::finish() { auto flushStampToWaitFor = this->flushStamp->peekStamp(); // Stall until HW reaches CQ taskCount - waitUntilComplete(taskCountToWaitFor, flushStampToWaitFor, false); + waitUntilComplete(taskCountToWaitFor, this->bcsTaskCount, flushStampToWaitFor, false); return CL_SUCCESS; } diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index b59d0cb5dd..a43cd11d44 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -224,8 +224,9 @@ uint32_t Event::getCompletionStamp() const { return this->taskCount; } -void Event::updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp) { - this->taskCount = taskCount; +void Event::updateCompletionStamp(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp) { + this->taskCount = gpgpuTaskCount; + this->bcsTaskCount = bcsTaskCount; this->taskLevel = tasklevel; this->flushStamp->setStamp(flushStamp); } @@ -370,7 +371,7 @@ inline bool Event::wait(bool blocking, bool useQuickKmdSleep) { } } - cmdQueue->waitUntilComplete(taskCount.load(), flushStamp->peekStamp(), useQuickKmdSleep); + cmdQueue->waitUntilComplete(taskCount.load(), this->bcsTaskCount, flushStamp->peekStamp(), useQuickKmdSleep); updateExecutionStatus(); DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0); @@ -510,11 +511,9 @@ void Event::transitionExecutionStatus(int32_t newExecutionStatus) const { void Event::submitCommand(bool abortTasks) { std::unique_ptr cmdToProcess(cmdToSubmit.exchange(nullptr)); if (cmdToProcess.get() != nullptr) { - std::unique_lock lockCSR; - if (this->cmdQueue) { - lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); - } - if ((this->isProfilingEnabled()) && (this->cmdQueue != nullptr)) { + auto lockCSR = getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); + + if (this->isProfilingEnabled()) { if (timeStampNode) { this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*timeStampNode->getBaseGraphicsAllocation()); cmdToProcess->timestamp = timeStampNode; @@ -530,10 +529,10 @@ void Event::submitCommand(bool abortTasks) { } } auto &complStamp = cmdToProcess->submit(taskLevel, abortTasks); - if (profilingCpuPath && this->isProfilingEnabled() && (this->cmdQueue != nullptr)) { + if (profilingCpuPath && this->isProfilingEnabled()) { setEndTimeStamp(); } - updateTaskCount(complStamp.taskCount); + updateTaskCount(complStamp.taskCount, cmdQueue->peekBcsTaskCount()); flushStamp->setStamp(complStamp.flushStamp); submittedCmd.exchange(cmdToProcess.release()); } else if (profilingCpuPath && endTimeStamp == 0) { @@ -543,7 +542,7 @@ void Event::submitCommand(bool abortTasks) { if (!this->isUserEvent() && this->eventWithoutCommand) { if (this->cmdQueue) { auto lockCSR = this->getCommandQueue()->getGpgpuCommandStreamReceiver().obtainUniqueOwnership(); - updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount()); + updateTaskCount(this->cmdQueue->getGpgpuCommandStreamReceiver().peekTaskCount(), cmdQueue->peekBcsTaskCount()); } } //make sure that task count is synchronized for events with kernels diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h index 59b2327ae8..0593b65c25 100644 --- a/opencl/source/event/event.h +++ b/opencl/source/event/event.h @@ -89,7 +89,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { ~Event() override; uint32_t getCompletionStamp(void) const; - void updateCompletionStamp(uint32_t taskCount, uint32_t tasklevel, FlushStamp flushStamp); + void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp); cl_ulong getDelta(cl_ulong startTime, cl_ulong endTime); void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; } @@ -243,14 +243,15 @@ class Event : public BaseObject<_cl_event>, public IDNode { virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus); - void updateTaskCount(uint32_t taskCount) { - if (taskCount == CompletionStamp::notReady) { + void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) { + if (gpgpuTaskCount == CompletionStamp::notReady) { DEBUG_BREAK_IF(true); return; } - uint32_t prevTaskCount = this->taskCount.exchange(taskCount); - if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > taskCount)) { + this->bcsTaskCount = bcsTaskCount; + uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount); + if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) { this->taskCount = prevTaskCount; DEBUG_BREAK_IF(true); } @@ -363,6 +364,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { uint64_t startTimeStamp; uint64_t endTimeStamp; uint64_t completeTimeStamp; + uint32_t bcsTaskCount = 0; bool perfCountersEnabled; TagNode *timeStampNode = nullptr; TagNode *perfCounterNode = nullptr; diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index bfece03443..927f7a58eb 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -88,7 +88,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) { commandQueue.getDevice()); if (!memObj.isMemObjZeroCopy()) { - commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false); + commandQueue.waitUntilComplete(completionStamp.taskCount, commandQueue.peekBcsTaskCount(), completionStamp.flushStamp, false); if (operationType == MAP) { memObj.transferDataToHostPtr(copySize, copyOffset); } else if (!readOnly) { @@ -268,7 +268,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate } if (printfHandler) { - commandQueue.waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false); + commandQueue.waitUntilComplete(completionStamp.taskCount, commandQueue.peekBcsTaskCount(), completionStamp.flushStamp, false); printfHandler.get()->printEnqueueOutput(); } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 2656bc9bd2..d4c1f8cc7c 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -1025,6 +1025,132 @@ HWTEST_TEMPLATED_F(BlitEnqueueFlushTests, givenDebugFlagSetWhenCheckingBcsCacheF EXPECT_TRUE(mockCommandQueue->isCacheFlushForBcsRequired()); } +using BlitEnqueueTaskCountTests = BlitEnqueueTests<1>; + +HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenWaitForSpecificBcsTaskCount) { + uint32_t gpgpuTaskCount = 123; + uint32_t bcsTaskCount = 123; + + commandQueue->waitUntilComplete(gpgpuTaskCount, bcsTaskCount, 0, false); + + EXPECT_EQ(gpgpuTaskCount, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(bcsTaskCount, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); +} + +HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { + auto buffer = createBuffer(1, false); + buffer->forceDisallowCPUCopy = true; + int hostPtr = 0; + + auto ultGpgpuCsr = static_cast *>(gpgpuCsr); + auto ultBcsCsr = static_cast *>(bcsCsr); + + cl_event outEvent1, outEvent2; + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1); + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2); + + clWaitForEvents(1, &outEvent2); + EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clWaitForEvents(1, &outEvent1); + EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clReleaseEvent(outEvent1); + clReleaseEvent(outEvent2); +} + +HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEventWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { + auto buffer = createBuffer(1, false); + buffer->forceDisallowCPUCopy = true; + int hostPtr = 0; + + auto ultGpgpuCsr = static_cast *>(gpgpuCsr); + auto ultBcsCsr = static_cast *>(bcsCsr); + + cl_event outEvent1, outEvent2; + UserEvent userEvent; + cl_event waitlist1 = &userEvent; + cl_event *waitlist2 = &outEvent1; + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, &waitlist1, &outEvent1); + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 1, waitlist2, &outEvent2); + + userEvent.setStatus(CL_COMPLETE); + + clWaitForEvents(1, &outEvent2); + EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(2u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clWaitForEvents(1, &outEvent1); + EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(1u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clReleaseEvent(outEvent1); + clReleaseEvent(outEvent2); + + EXPECT_FALSE(commandQueue->isQueueBlocked()); +} + +HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEnqueueWithoutKernelWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { + auto ultGpgpuCsr = static_cast *>(gpgpuCsr); + auto ultBcsCsr = static_cast *>(bcsCsr); + + cl_event outEvent1, outEvent2; + UserEvent userEvent; + cl_event waitlist1 = &userEvent; + cl_event *waitlist2 = &outEvent1; + + commandQueue->enqueueMarkerWithWaitList(1, &waitlist1, &outEvent1); + commandQueue->enqueueMarkerWithWaitList(1, waitlist2, &outEvent2); + + userEvent.setStatus(CL_COMPLETE); + + clWaitForEvents(1, &outEvent2); + EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clWaitForEvents(1, &outEvent1); + EXPECT_EQ(0u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clReleaseEvent(outEvent1); + clReleaseEvent(outEvent2); + + EXPECT_FALSE(commandQueue->isQueueBlocked()); +} + +HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventFromCpuCopyWhenWaitingForCompletionThenWaitForCurrentBcsTaskCount) { + DebugManager.flags.DoCpuCopyOnWriteBuffer.set(1); + auto buffer = createBuffer(1, false); + int hostPtr = 0; + + auto ultGpgpuCsr = static_cast *>(gpgpuCsr); + auto ultBcsCsr = static_cast *>(bcsCsr); + + ultGpgpuCsr->taskCount = 1; + commandQueue->taskCount = 1; + + ultBcsCsr->taskCount = 2; + commandQueue->updateBcsTaskCount(2); + + cl_event outEvent1, outEvent2; + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent1); + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, &outEvent2); + + clWaitForEvents(1, &outEvent2); + EXPECT_EQ(1u, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(2u, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clWaitForEvents(1, &outEvent1); + EXPECT_EQ(1u, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(2u, static_cast *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); + + clReleaseEvent(outEvent1); + clReleaseEvent(outEvent2); +} + using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>; HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenDebugFlagSetWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) { diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp index a32b90dfb2..04eccb2083 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp @@ -329,7 +329,7 @@ HWTEST_F(CommandQueueHwTest, GivenEventsWaitlistOnBlockingMapBufferWillWaitForEv MockEvent(Context *ctx, uint32_t updateCountBeforeCompleted) : UserEvent(ctx), updateCount(0), updateCountBeforeCompleted(updateCountBeforeCompleted) { - this->updateTaskCount(0); + this->updateTaskCount(0, 0); this->taskLevel = 0; } @@ -959,7 +959,7 @@ HWTEST_F(CommandQueueHwTest, givenEventWithRecordedCommandWhenSubmitCommandIsCal std::thread t([&]() { while (!go) ; - neoEvent.updateTaskCount(77u); + neoEvent.updateTaskCount(77u, 0); }); neoEvent.submitCommand(false); diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index f972371115..fba01e2aab 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -799,7 +799,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; - void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { requestedUseQuickKmdSleep = useQuickKmdSleep; waitUntilCompleteCounter++; } diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 9c65f66510..2be1f3065e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -105,9 +105,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { public: MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} - void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { waitUntilCompleteCalled = true; - CommandQueueHw::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); + CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); } void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies) override { @@ -420,10 +420,10 @@ HWTEST_F(EnqueueHandlerTest, givenEnqueueHandlerWhenAddPatchInfoCommentsForAUBDu } HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestingEnqueueWithoutGpuSubmissionThenTaskCountIsNotInherited) { - struct ExternallySynchEvent : VirtualEvent { - ExternallySynchEvent(CommandQueue *cmdQueue) { + struct ExternallySynchEvent : UserEvent { + ExternallySynchEvent() : UserEvent() { setStatus(CL_COMPLETE); - this->updateTaskCount(7); + this->updateTaskCount(7, 0); } bool isExternallySynchronized() const override { return true; @@ -432,7 +432,7 @@ HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestin auto mockCmdQ = new MockCommandQueueHw(context, pClDevice, 0); - ExternallySynchEvent synchEvent(mockCmdQ); + ExternallySynchEvent synchEvent; cl_event inEv = &synchEvent; cl_event outEv = nullptr; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 1e759b83d8..d4957f004f 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -718,9 +718,9 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest { auxTranslationDirection); } - void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { waitCalled++; - CommandQueueHw::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); + CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); } std::vector auxTranslationDirections; diff --git a/opencl/test/unit_test/event/async_events_handler_tests.cpp b/opencl/test/unit_test/event/async_events_handler_tests.cpp index d6e2988a44..c4a104eee9 100644 --- a/opencl/test/unit_test/event/async_events_handler_tests.cpp +++ b/opencl/test/unit_test/event/async_events_handler_tests.cpp @@ -7,11 +7,13 @@ #include "shared/source/helpers/timestamp_packet.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" +#include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/async_events_handler.h" #include "opencl/source/event/event.h" #include "opencl/source/event/user_event.h" #include "opencl/test/unit_test/mocks/mock_async_event_handler.h" +#include "opencl/test/unit_test/mocks/mock_command_queue.h" #include "opencl/test/unit_test/mocks/mock_context.h" #include "test.h" @@ -32,7 +34,7 @@ class AsyncEventsHandlerTests : public ::testing::Test { } void setTaskStamp(uint32_t taskLevel, uint32_t taskCount) { this->taskLevel.store(taskLevel); - this->updateTaskCount(taskCount); + this->updateTaskCount(taskCount, 0); } MOCK_METHOD2(wait, bool(bool blocking, bool quickKmdSleep)); @@ -46,36 +48,34 @@ class AsyncEventsHandlerTests : public ::testing::Test { dbgRestore.reset(new DebugManagerStateRestore()); DebugManager.flags.EnableAsyncEventsHandler.set(false); handler.reset(new MockHandler()); - context = new NiceMock(); + context = make_releaseable>(); - event1 = new NiceMock(context, nullptr, CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); - event2 = new NiceMock(context, nullptr, CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); - event3 = new NiceMock(context, nullptr, CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); - } + commandQueue = make_releaseable(context.get(), context->getDevice(0), nullptr); - void TearDown() override { - context->release(); - event1->release(); - event2->release(); - event3->release(); + *(commandQueue->getGpgpuCommandStreamReceiver().getTagAddress()) = 0; + + event1 = make_releaseable>(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); + event2 = make_releaseable>(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); + event3 = make_releaseable>(context.get(), commandQueue.get(), CL_COMMAND_BARRIER, CompletionStamp::notReady, CompletionStamp::notReady); } std::unique_ptr dbgRestore; std::unique_ptr handler; int counter = 0; - NiceMock *event1 = nullptr; - NiceMock *event2 = nullptr; - NiceMock *event3 = nullptr; - NiceMock *context = nullptr; + ReleaseableObjectPtr> context; + ReleaseableObjectPtr commandQueue; + ReleaseableObjectPtr> event1; + ReleaseableObjectPtr> event2; + ReleaseableObjectPtr> event3; }; TEST_F(AsyncEventsHandlerTests, givenEventsWhenListIsProcessedThenUpdateExecutionStatus) { event1->setTaskStamp(0, 0); event2->setTaskStamp(0, 0); - handler->registerEvent(event1); - handler->registerEvent(event2); + handler->registerEvent(event1.get()); + handler->registerEvent(event2.get()); EXPECT_EQ(CL_QUEUED, event1->getExecutionStatus()); EXPECT_EQ(CL_QUEUED, event2->getExecutionStatus()); @@ -91,7 +91,7 @@ TEST_F(AsyncEventsHandlerTests, givenEventsWhenListIsProcessedThenUpdateExecutio TEST_F(AsyncEventsHandlerTests, WhenProcessIsCompletedThenRefInternalCountIsDecremented) { event1->setTaskStamp(CompletionStamp::notReady, 0); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); EXPECT_EQ(2, event1->getRefInternalCount()); handler->process(); EXPECT_TRUE(handler->peekIsListEmpty()); @@ -103,7 +103,7 @@ TEST_F(AsyncEventsHandlerTests, givenNotCalledCallbacksWhenListIsProcessedThenDo event1->setTaskStamp(CompletionStamp::notReady, 0); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &submittedCounter); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &completeCounter); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); auto expect = [&](int status, int sCounter, int cCounter, bool empty) { EXPECT_EQ(status, event1->getExecutionStatus()); @@ -160,10 +160,10 @@ TEST_F(AsyncEventsHandlerTests, givenExternallSynchronizedEventWhenListIsProcess } TEST_F(AsyncEventsHandlerTests, givenDoubleRegisteredEventWhenListIsProcessedAndNoCallbacksToProcessThenUnregister) { - event1->setTaskStamp(CompletionStamp::notReady - 1, 0); + event1->setTaskStamp(CompletionStamp::notReady - 1, CompletionStamp::notReady + 1); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); - handler->registerEvent(event1); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); + handler->registerEvent(event1.get()); handler->process(); EXPECT_EQ(CL_SUBMITTED, event1->getExecutionStatus()); @@ -178,9 +178,9 @@ TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenDestructingTh event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); - myHandler->registerEvent(event1); + myHandler->registerEvent(event1.get()); myHandler->process(); - myHandler->registerEvent(event2); + myHandler->registerEvent(event2.get()); EXPECT_FALSE(myHandler->peekIsListEmpty()); EXPECT_FALSE(myHandler->peekIsRegisterListEmpty()); @@ -202,9 +202,9 @@ TEST_F(AsyncEventsHandlerTests, givenEventsNotHandledByHandlderWhenAsyncExecutio event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); handler->process(); - handler->registerEvent(event2); + handler->registerEvent(event2.get()); EXPECT_FALSE(handler->peekIsListEmpty()); EXPECT_FALSE(handler->peekIsRegisterListEmpty()); @@ -231,15 +231,15 @@ TEST_F(AsyncEventsHandlerTests, WhenHandlerIsRegisteredThenThreadIsCreated) { event1->setTaskStamp(CompletionStamp::notReady, 0); EXPECT_FALSE(handler->openThreadCalled); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); EXPECT_TRUE(handler->openThreadCalled); } TEST_F(AsyncEventsHandlerTests, WhenProcessingAsynchronouslyThenBothThreadsCompelete) { DebugManager.flags.EnableAsyncEventsHandler.set(true); - event1->setTaskStamp(CompletionStamp::notReady, 0); - event2->setTaskStamp(CompletionStamp::notReady, 0); + event1->setTaskStamp(CompletionStamp::notReady, CompletionStamp::notReady + 1); + event2->setTaskStamp(CompletionStamp::notReady, CompletionStamp::notReady + 1); event1->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); event2->addCallback(&this->callbackFcn, CL_SUBMITTED, &counter); @@ -309,14 +309,14 @@ TEST_F(AsyncEventsHandlerTests, givenRegistredEventsWhenProcessIsCalledThenRetur event3->setTaskStamp(0, 3); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &event2Counter); - handler->registerEvent(event2); + handler->registerEvent(event2.get()); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &event1Counter); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); event3->addCallback(&this->callbackFcn, CL_COMPLETE, &event3Counter); - handler->registerEvent(event3); + handler->registerEvent(event3.get()); auto sleepCandidate = handler->process(); - EXPECT_EQ(event1, sleepCandidate); + EXPECT_EQ(event1.get(), sleepCandidate); event1->setStatus(CL_COMPLETE); event2->setStatus(CL_COMPLETE); @@ -327,12 +327,12 @@ TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontR event1->setTaskStamp(0, 1); event2->setTaskStamp(0, 2); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); event2->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); - handler->registerEvent(event2); + handler->registerEvent(event2.get()); auto sleepCandidate = handler->process(); - EXPECT_EQ(event2, sleepCandidate); + EXPECT_EQ(event2.get(), sleepCandidate); event2->setStatus(CL_COMPLETE); } @@ -340,7 +340,7 @@ TEST_F(AsyncEventsHandlerTests, givenEventWithoutCallbacksWhenProcessedThenDontR TEST_F(AsyncEventsHandlerTests, givenSleepCandidateWhenProcessedThenCallWaitWithQuickKmdSleepRequest) { event1->setTaskStamp(0, 1); event1->addCallback(&this->callbackFcn, CL_COMPLETE, &counter); - handler->registerEvent(event1); + handler->registerEvent(event1.get()); handler->allowAsyncProcess.store(true); // break infinite loop after first iteartion diff --git a/opencl/test/unit_test/event/event_builder_tests.cpp b/opencl/test/unit_test/event/event_builder_tests.cpp index 2796348967..f5600cac92 100644 --- a/opencl/test/unit_test/event/event_builder_tests.cpp +++ b/opencl/test/unit_test/event/event_builder_tests.cpp @@ -24,12 +24,12 @@ namespace NEO { struct SmallEventBuilderEventMock : MockEvent { - SmallEventBuilderEventMock(int param1, float param2) - : MockEvent(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0), constructionParam1(param1), constructionParam2(param2) { + SmallEventBuilderEventMock(CommandQueue *commandQueue, int param1, float param2) + : MockEvent(commandQueue, CL_COMMAND_NDRANGE_KERNEL, 0, 0), constructionParam1(param1), constructionParam2(param2) { } - SmallEventBuilderEventMock() - : SmallEventBuilderEventMock(1, 2.0f) { + SmallEventBuilderEventMock(CommandQueue *commandQueue) + : SmallEventBuilderEventMock(commandQueue, 1, 2.0f) { } void overrideMagic(cl_long newMagic) { @@ -54,12 +54,16 @@ struct SmallEventBuilderMock : EventBuilder { }; TEST(EventBuilder, whenCreatingNewEventForwardsArgumentsToEventConstructor) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); + EventBuilder eventBuilder; EXPECT_EQ(nullptr, eventBuilder.getEvent()); constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; - eventBuilder.create(constrParam1, constrParam2); + eventBuilder.create(&cmdQ, constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); auto finalizedEvent = static_cast(eventBuilder.finalizeAndRelease()); @@ -79,7 +83,8 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) { }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockCommandQueue cmdQ(nullptr, device.get(), nullptr); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; @@ -94,7 +99,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) { std::unique_ptr command = std::make_unique(cmdQ, kernelOperation, surfaces, kernel); - VirtualEvent virtualEvent; + VirtualEvent virtualEvent(&cmdQ); virtualEvent.setCommand(std::move(command)); EventBuilder eventBuilder; @@ -102,7 +107,7 @@ TEST(EventBuilder, givenVirtualEventWithCommandThenFinalizeAddChild) { constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; - eventBuilder.create(constrParam1, constrParam2); + eventBuilder.create(&cmdQ, constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); virtualEvent.taskLevel = CL_SUBMITTED; @@ -128,7 +133,8 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA }; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); - MockCommandQueue cmdQ(nullptr, device.get(), nullptr); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); MockKernelWithInternals kernel(*device); IndirectHeap *ih1 = nullptr, *ih2 = nullptr, *ih3 = nullptr; @@ -152,7 +158,7 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA constexpr int constrParam1 = 7; constexpr float constrParam2 = 13.0f; - eventBuilder.create(constrParam1, constrParam2); + eventBuilder.create(&cmdQ, constrParam1, constrParam2); Event *peekedEvent = eventBuilder.getEvent(); ASSERT_NE(nullptr, peekedEvent); virtualEvent.taskLevel = CL_SUBMITTED; @@ -164,11 +170,15 @@ TEST(EventBuilder, givenVirtualEventWithSubmittedCommandAsParentThenFinalizeNotA } TEST(EventBuilder, whenDestroyingEventBuilderImplicitFinalizeIscalled) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); + SmallEventBuilderEventMock *ev = nullptr; auto parentEvent = new UserEvent; { EventBuilder eventBuilder{}; - eventBuilder.create(); + eventBuilder.create(&cmdQ); eventBuilder.addParentEvent(*parentEvent); ev = static_cast(eventBuilder.getEvent()); ASSERT_NE(nullptr, ev); @@ -181,14 +191,18 @@ TEST(EventBuilder, whenDestroyingEventBuilderImplicitFinalizeIscalled) { } TEST(EventBuilder, whenFinalizeIsCalledTwiceOnEventBuilderThenSecondRequestIsDropped) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); + SmallEventBuilderEventMock *ev = nullptr; EventBuilder eventBuilder{}; - eventBuilder.create(); + eventBuilder.create(&cmdQ); ev = static_cast(eventBuilder.getEvent()); ASSERT_NE(nullptr, ev); eventBuilder.finalize(); auto *falseParentEvent = new UserEvent(); - auto *falseChildEvent = new SmallEventBuilderEventMock; + auto *falseChildEvent = new SmallEventBuilderEventMock(&cmdQ); auto numParents = ev->peekNumEventsBlockingThis(); auto numChildren = (ev->peekChildEvents() != nullptr) ? 1U + ev->peekChildEvents()->countSuccessors() : 0; eventBuilder.addParentEvent(*falseParentEvent); @@ -202,8 +216,12 @@ TEST(EventBuilder, whenFinalizeIsCalledTwiceOnEventBuilderThenSecondRequestIsDro } TEST(EventBuilder, whenFinalizeAndReleaseIsCalledThenEventBuilderReleasesReferenceToEvent) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); + EventBuilder eventBuilder; - eventBuilder.create(); + eventBuilder.create(&cmdQ); auto ev = static_cast(eventBuilder.finalizeAndRelease()); ASSERT_NE(nullptr, ev); ASSERT_EQ(nullptr, eventBuilder.getEvent()); @@ -224,6 +242,10 @@ TEST(EventBuilder, whenClearIsCalledThenAllEventsAndReferencesAreDropped) { } TEST(EventBuilder, whenCParentEventsGetAddedThenTheirReferenceCountGetsIncreasedUntilFinalizeIsCalled) { + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); + UserEvent evParent1; UserEvent evParent2; @@ -231,7 +253,7 @@ TEST(EventBuilder, whenCParentEventsGetAddedThenTheirReferenceCountGetsIncreased EXPECT_EQ(1, evParent2.getRefInternalCount()); EventBuilder eventBuilder; - eventBuilder.create(); + eventBuilder.create(&cmdQ); eventBuilder.addParentEvent(evParent1); EXPECT_EQ(2, evParent1.getRefInternalCount()); eventBuilder.addParentEvent(evParent2); @@ -305,7 +327,11 @@ TEST(EventBuilder, whenAddingNullptrAsNewParentEventThenItIsIgnored) { } TEST(EventBuilder, whenAddingValidEventAsNewParentEventThenItIsProperlyAddedToParentsList) { - auto event = new SmallEventBuilderEventMock; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); + + auto event = new SmallEventBuilderEventMock(&cmdQ); SmallEventBuilderMock eventBuilder; eventBuilder.create>(nullptr, CL_COMMAND_MARKER, 0, 0); EXPECT_EQ(0U, eventBuilder.getParentEvents().size()); @@ -317,8 +343,12 @@ TEST(EventBuilder, whenAddingValidEventAsNewParentEventThenItIsProperlyAddedToPa } TEST(EventBuilder, whenAddingMultipleEventsAsNewParentsThenOnlyValidOnesAreInsertedIntoParentsList) { - auto event = new SmallEventBuilderEventMock; - auto invalidEvent = new SmallEventBuilderEventMock; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockContext context(device.get()); + MockCommandQueue cmdQ(&context, device.get(), nullptr); + + auto event = new SmallEventBuilderEventMock(&cmdQ); + auto invalidEvent = new SmallEventBuilderEventMock(&cmdQ); invalidEvent->overrideMagic(0); cl_event eventsList[] = {nullptr, event, invalidEvent}; SmallEventBuilderMock eventBuilder; diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index f64f34097d..9aa10a9f7f 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -87,7 +87,7 @@ TEST(Event, givenEventWithHigherTaskCountWhenLowerTaskCountIsBeingSetThenTaskCou Event *event = new Event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 4, 10); EXPECT_EQ(10u, event->peekTaskCount()); - event->updateTaskCount(8); + event->updateTaskCount(8, 0); EXPECT_EQ(10u, event->peekTaskCount()); delete event; } @@ -601,7 +601,7 @@ TEST_F(InternalsEventTest, givenBlockedKernelWithPrintfWhenSubmittedThenPrintOut TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) { auto pCmdQ = make_releaseable(mockContext, pClDevice, nullptr); - MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new MockBuffer; @@ -622,7 +622,7 @@ TEST_F(InternalsEventTest, GivenMapOperationWhenSubmittingCommandsThenTaskLevelI TEST_F(InternalsEventTest, GivenMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) { auto pCmdQ = make_releaseable(mockContext, pClDevice, nullptr); - MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; @@ -734,7 +734,7 @@ TEST_F(InternalsEventTest, GivenProfilingWHENMapOperationTHENTimesSet) { TEST_F(InternalsEventTest, GivenUnMapOperationWhenSubmittingCommandsThenTaskLevelIsIncremented) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = make_releaseable(mockContext, pClDevice, props); - MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; @@ -756,7 +756,7 @@ TEST_F(InternalsEventTest, GivenUnMapOperationWhenSubmittingCommandsThenTaskLeve TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledItReleasesMemObjectReference) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = std::make_unique(mockContext, pClDevice, props); - MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto buffer = new UnalignedBuffer; @@ -775,7 +775,7 @@ TEST_F(InternalsEventTest, givenBlockedMapCommandWhenSubmitIsCalledItReleasesMem TEST_F(InternalsEventTest, GivenUnMapOperationNonZeroCopyBufferWhenSubmittingCommandsThenTaskLevelIsIncremented) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0}; auto pCmdQ = std::make_unique(mockContext, pClDevice, props); - MockEvent event(nullptr, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + MockEvent event(pCmdQ.get(), CL_COMMAND_NDRANGE_KERNEL, 0, 0); auto &csr = pCmdQ->getGpgpuCommandStreamReceiver(); auto buffer = new UnalignedBuffer; @@ -1398,7 +1398,7 @@ HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWa pDevice->resetCommandStreamReceiver(csr); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); - event.updateCompletionStamp(1u, 1u, 1u); + event.updateCompletionStamp(1u, 0, 1u, 1u); EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, ::testing::_)) @@ -1426,7 +1426,7 @@ HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestT pDevice->resetCommandStreamReceiver(csr); Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); - event.updateCompletionStamp(1u, 1u, 1u); + event.updateCompletionStamp(1u, 0, 1u, 1u); EXPECT_CALL(*csr, waitForCompletionWithTimeout(::testing::_, localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, ::testing::_)) diff --git a/opencl/test/unit_test/event/event_tracker_tests.cpp b/opencl/test/unit_test/event/event_tracker_tests.cpp index 857393950a..048f438531 100644 --- a/opencl/test/unit_test/event/event_tracker_tests.cpp +++ b/opencl/test/unit_test/event/event_tracker_tests.cpp @@ -158,7 +158,7 @@ TEST(EventsTracker, whenCallDumpEdgeThenGetStringWithProperLabelOfDumpedEdge) { TEST(EventsTracker, givenEventWithTaskLevelAndCountNotReadyThenDumpingNodeWithNotReadyLabels) { UserEvent uEvent; uEvent.taskLevel = CompletionStamp::notReady; - uEvent.updateTaskCount(CompletionStamp::notReady); + uEvent.updateTaskCount(CompletionStamp::notReady, 0); std::stringstream stream; std::unordered_map map; @@ -175,7 +175,7 @@ TEST(EventsTracker, givenEventWithTaskLevelAndCountNotReadyThenDumpingNodeWithNo TEST(EventsTracker, whenCallDumpNodeFunctionThenDumpingNodeWithProperTaskLevelAndCountValues) { UserEvent uEvent; uEvent.taskLevel = 1; - uEvent.updateTaskCount(1); + uEvent.updateTaskCount(1, 0); std::stringstream stream; std::unordered_map map; @@ -232,7 +232,7 @@ TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingWithProperLabels) { MockCommandQueue cmdq; VirtualEvent vEvent(&cmdq, &ctx); vEvent.setCurrentCmdQVirtualEvent(true); - vEvent.updateTaskCount(1); + vEvent.updateTaskCount(1, 0); std::stringstream stream; std::unordered_map map; @@ -395,7 +395,7 @@ TEST(EventsTracker, givenCmdqAndItsVirtualEventThenDumpingProperGraph) { MockCommandQueue cmdq; VirtualEvent vEvent(&cmdq, &ctx); vEvent.setCurrentCmdQVirtualEvent(true); - vEvent.updateTaskCount(1); + vEvent.updateTaskCount(1, 0); std::stringstream stream; std::unordered_map map; @@ -434,9 +434,9 @@ TEST(EventsTracker, givenTwoEventsWithCommonParentEventThenDumpingProperGraph) { EXPECT_STREQ(expected.str().c_str(), stream.str().c_str()); - uEventChild1.updateCompletionStamp(0, 0, 0); - uEventChild2.updateCompletionStamp(0, 0, 0); - uEvent.updateCompletionStamp(0, 0, 0); + uEventChild1.updateCompletionStamp(0, 0, 0, 0); + uEventChild2.updateCompletionStamp(0, 0, 0, 0); + uEvent.updateCompletionStamp(0, 0, 0, 0); uEvent.setStatus(0); } @@ -611,10 +611,10 @@ TEST(EventsTracker, givenEventsWithDependenciesBetweenThemThenDumpingProperGraph EXPECT_STREQ(expected.str().c_str(), evTrackerMock.streamMock.c_str()); - uEventChild1.updateCompletionStamp(0, 0, 0); - uEventChild2.updateCompletionStamp(0, 0, 0); - uEvent2.updateCompletionStamp(0, 0, 0); - uEvent1.updateCompletionStamp(0, 0, 0); + uEventChild1.updateCompletionStamp(0, 0, 0, 0); + uEventChild2.updateCompletionStamp(0, 0, 0, 0); + uEvent2.updateCompletionStamp(0, 0, 0, 0); + uEvent1.updateCompletionStamp(0, 0, 0, 0); uEvent2.setStatus(0); uEvent1.setStatus(0); } diff --git a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp index 729f160f0b..b46cf4181b 100644 --- a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp +++ b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp @@ -98,7 +98,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTr EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) { @@ -108,7 +108,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompleti EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); - cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) { @@ -121,7 +121,7 @@ HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThen EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(false)); //we have unrecoverable for this case, this will throw. - EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false), std::exception); + EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false), std::exception); } HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) { @@ -131,7 +131,7 @@ HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTry EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0); - cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDisableQuickKmdSleep) { @@ -140,7 +140,7 @@ HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDi EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, false); + cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false); } HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThenChangeDelayValue) { @@ -149,7 +149,7 @@ HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThen EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true); + cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true); } HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSleepRequestIsCalledThenUseBaseDelayValue) { @@ -159,7 +159,7 @@ HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSl EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true)); - cmdQ->waitUntilComplete(taskCountToWait, flushStampToWait, true); + cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true); } HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) { @@ -213,7 +213,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCal auto csr = createMockCsr(); EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); cmdQ->throttle = QueueThrottle::LOW; - cmdQ->waitUntilComplete(1, 1, false); + cmdQ->waitUntilComplete(1, 0, 1, false); } HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) { @@ -222,7 +222,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButTher EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, ::testing::_)).Times(1).WillOnce(::testing::Return(true)); cmdQ->throttle = QueueThrottle::LOW; - cmdQ->waitUntilComplete(1, 0, false); + cmdQ->waitUntilComplete(1, 0, 0, false); } HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) { @@ -408,4 +408,3 @@ TEST_F(KmdNotifyTests, givenEnabledKmdNotifyMechanismWhenPowerSavingModeIsSetAnd EXPECT_FALSE(timeoutEnabled); EXPECT_EQ(0, timeout); } - diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 66edde6eaa..5a263cc5a8 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -60,9 +60,9 @@ class MockCommandQueue : public CommandQueue { return writeBufferRetValue; } - void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { - latestTaskCountWaited = taskCountToWait; - return CommandQueue::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + latestTaskCountWaited = gpgpuTaskCountToWait; + return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); } cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t srcOrigin[3], @@ -272,9 +272,9 @@ class MockCommandQueueHw : public CommandQueueHw { notifyEnqueueReadImageCalled = true; } - void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { - latestTaskCountWaited = taskCountToWait; - return BaseClass::waitUntilComplete(taskCountToWait, flushStampToWait, useQuickKmdSleep); + void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + latestTaskCountWaited = gpgpuTaskCountToWait; + return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep); } bool isCacheFlushForBcsRequired() const override {