From c279647cb2dcbf2592f8dd44097c4bf45ca9bb17 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Thu, 28 Aug 2025 04:25:49 +0200 Subject: [PATCH] Revert "fix: Submit every marker with dependencies" This reverts commit d498f16fc38baf6ca6aed967d190ec5987a2b438. Signed-off-by: Compute-Runtime-Validation --- opencl/source/command_queue/command_queue.cpp | 5 --- opencl/source/command_queue/command_queue.h | 2 -- opencl/source/command_queue/enqueue_common.h | 4 +-- .../command_queue/blit_enqueue_1_tests.cpp | 8 ++--- .../command_queue/enqueue_barrier_tests.cpp | 2 -- .../command_queue/enqueue_handler_tests.cpp | 2 +- .../enqueue_map_buffer_tests.cpp | 33 ++++++------------- .../command_queue/enqueue_map_image_tests.cpp | 6 +--- .../command_queue/enqueue_marker_tests.cpp | 3 ++ .../enqueue_unmap_memobject_tests.cpp | 2 +- .../command_queue/get_size_required_tests.cpp | 31 ++++++++++++++--- .../command_queue/ooq_task_tests.cpp | 23 +++++++++++++ .../helpers/timestamp_packet_2_tests.cpp | 19 +++++++++-- .../sharings/gl/windows/gl_sharing_tests.cpp | 1 - 14 files changed, 87 insertions(+), 54 deletions(-) diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 86998d5867..f97bcc0835 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -1210,11 +1210,6 @@ bool CommandQueue::isBlockedCommandStreamRequired(uint32_t commandType, const Ev return false; } -bool CommandQueue::isDependenciesFlushForMarkerRequired(const EventsRequest &eventsRequest) const { - return this->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && - (eventsRequest.outEvent || eventsRequest.numEventsInWaitList > 0); -} - void CommandQueue::storeProperties(const cl_queue_properties *properties) { if (properties) { for (size_t i = 0; properties[i] != 0; i += 2) { diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index a16b7cf476..42e0c1bd75 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -458,8 +458,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> { virtual void obtainTaskLevelAndBlockedStatus(TaskCountType &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType){}; bool isBlockedCommandStreamRequired(uint32_t commandType, const EventsRequest &eventsRequest, bool blockedQueue, bool isMarkerWithProfiling) const; - bool isDependenciesFlushForMarkerRequired(const EventsRequest &eventsRequest) const; - MOCKABLE_VIRTUAL void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr); void storeProperties(const cl_queue_properties *properties); void processProperties(const cl_queue_properties *properties); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index eb10a48fcf..65f3b09abf 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -161,6 +161,8 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, EventBuilder eventBuilder; setupEvent(eventBuilder, event, commandType); + const bool isFlushWithPostSyncWrite = isFlushForProfilingRequired(commandType) && ((eventBuilder.getEvent() && eventBuilder.getEvent()->isProfilingEnabled()) || multiDispatchInfo.peekBuiltinOpParams().bcsSplit); + std::unique_ptr blockedCommandsData; std::unique_ptr printfHandler; TakeOwnershipWrapper> queueOwnership(*this); @@ -188,8 +190,6 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, CsrDependencies csrDeps; BlitPropertiesContainer blitPropertiesContainer; - const bool isFlushWithPostSyncWrite = isFlushForProfilingRequired(commandType) && ((eventBuilder.getEvent() && eventBuilder.getEvent()->isProfilingEnabled()) || multiDispatchInfo.peekBuiltinOpParams().bcsSplit || this->isDependenciesFlushForMarkerRequired(eventsRequest)); - if (this->context->getRootDeviceIndices().size() > 1) { eventsRequest.fillCsrDependenciesForRootDevices(csrDeps, computeCommandStreamReceiver); } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index 4cd854d7db..7c9dba2498 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -1365,11 +1365,11 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenBlockedEnqueueWithoutKernelWh userEvent.setStatus(CL_COMPLETE); clWaitForEvents(1, &outEvent2); - EXPECT_EQ(2u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clWaitForEvents(1, &outEvent1); - EXPECT_EQ(1u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); + EXPECT_EQ(0u, ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); EXPECT_EQ(0u, ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount.load()); clReleaseEvent(outEvent1); @@ -1486,7 +1486,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperatio // make sure we wait for both clWaitForEvents(1, &outEvent1); - EXPECT_NE(ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount, ultBcsCsr->taskCount); + EXPECT_EQ(ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount, ultBcsCsr->taskCount); EXPECT_EQ(ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount, ultGpgpuCsr->taskCount); clWaitForEvents(1, &outEvent1); @@ -1512,7 +1512,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperatio // make sure we wait for both clWaitForEvents(1, &outEvent2); - EXPECT_NE(ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount, ultBcsCsr->taskCount); + EXPECT_EQ(ultBcsCsr->latestWaitForCompletionWithTimeoutTaskCount, ultBcsCsr->taskCount); EXPECT_EQ(ultGpgpuCsr->latestWaitForCompletionWithTimeoutTaskCount, ultGpgpuCsr->taskCount); clWaitForEvents(1, &outEvent2); diff --git a/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp index 51b8b8211f..2bde4d1c81 100644 --- a/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_barrier_tests.cpp @@ -270,7 +270,6 @@ HWTEST_F(BarrierTest, givenBlockedCommandQueueAndEnqueueBarrierWithWaitlistRetur EXPECT_EQ(pEvent->peekTaskCount(), CompletionStamp::notReady); event2.setStatus(CL_COMPLETE); clReleaseEvent(event); - pCmdQ->finish(); } HWTEST_F(BarrierTest, givenEmptyCommandStreamAndBlockedBarrierCommandWhenUserEventIsSignaledThenNewCommandStreamIsNotAcquired) { @@ -314,5 +313,4 @@ HWTEST_F(BarrierTest, givenEmptyCommandStreamAndBlockedBarrierCommandWhenUserEve EXPECT_GE(commandStream.getMaxAvailableSpace(), commandStream.getMaxAvailableSpace()); clReleaseEvent(event); - pCmdQ->finish(); } diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 26a29fca3b..aa8e4844e8 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -552,7 +552,7 @@ HWTEST_F(EnqueueHandlerTest, givenExternallySynchronizedParentEventWhenRequestin Event *ouputEvent = castToObject(outEv); ASSERT_NE(nullptr, ouputEvent); - EXPECT_EQ(mockCmdQ->taskCount, ouputEvent->peekTaskCount()); + EXPECT_EQ(0U, ouputEvent->peekTaskCount()); ouputEvent->release(); mockCmdQ->release(); diff --git a/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp index 7978dd0577..2b5e072765 100644 --- a/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_map_buffer_tests.cpp @@ -324,9 +324,7 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBuffer EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); - if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { - expectedTaskCount++; - } + // no dc flush required at this point EXPECT_EQ(expectedTaskCount, commandStreamReceiver.peekTaskCount()); taskCount = commandStreamReceiver.peekTaskCount(); @@ -334,11 +332,7 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBuffer auto neoEvent = castToObject(mapEventReturned); // if task count of csr is higher then event task count with proper dc flushing then we are fine - auto expectedStamp = this->heaplessStateInit ? 2u : 1u; - if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { - expectedStamp++; - } - EXPECT_EQ(expectedStamp, neoEvent->getCompletionStamp()); + EXPECT_EQ(this->heaplessStateInit ? 2u : 1u, neoEvent->getCompletionStamp()); // this can't be completed as task count is not reached yet EXPECT_FALSE(neoEvent->updateStatusAndCheckCompletion()); EXPECT_TRUE(CL_COMMAND_MAP_BUFFER == neoEvent->getCommandType()); @@ -375,9 +369,6 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBuffer nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); - if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { - expectedTaskCount++; - } if (commandStreamReceiver.isUpdateTagFromWaitEnabled()) { EXPECT_EQ(expectedTaskCount + 1, commandStreamReceiver.peekTaskCount()); @@ -387,7 +378,6 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingReadOnlyMapBufferOnZeroCopyBuffer auto unmapEvent = castToObject(unmapEventReturned); EXPECT_TRUE(CL_COMMAND_UNMAP_MEM_OBJECT == unmapEvent->getCommandType()); - mockCmdQueue.waitUntilCompleteReturnValue = WaitStatus::ready; retVal = clWaitForEvents(1, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); @@ -484,9 +474,6 @@ TEST_F(EnqueueMapBufferTest, givenReadOnlyBufferWhenMappedOnGpuThenSetValidEvent EXPECT_EQ(CL_SUCCESS, retVal); retVal = clEnqueueUnmapMemObject(pCmdQ, buffer.get(), ptrResult, 0, nullptr, &unmapEventReturned); - if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { - expectedTaskCount++; - } EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(expectedTaskCount, commandStreamReceiver.peekTaskCount()); @@ -500,7 +487,7 @@ TEST_F(EnqueueMapBufferTest, givenReadOnlyBufferWhenMappedOnGpuThenSetValidEvent clReleaseEvent(unmapEventReturned); } -HWTEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThenEventIsSignaledAsCompleted) { +TEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedThenEventIsSignaledAsCompleted) { cl_event eventReturned = nullptr; uint32_t tagHW = 0; *pTagMemory = tagHW; @@ -516,7 +503,7 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedT EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); - auto &commandStreamReceiver = pClDevice->getUltCommandStreamReceiver(); + auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); TaskCountType taskCount = commandStreamReceiver.peekTaskCount(); auto expectedTaskCount = this->heaplessStateInit ? 1u : 0u; @@ -543,9 +530,6 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedT nullptr, &eventReturned, &retVal); - if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { - expectedTaskCount++; - } EXPECT_NE(nullptr, ptrResult); EXPECT_EQ(CL_SUCCESS, retVal); @@ -561,11 +545,14 @@ HWTEST_F(EnqueueMapBufferTest, givenNonBlockingMapBufferAfterL3IsAlreadyFlushedT EXPECT_EQ(expectedTaskCount, commandStreamReceiver.peekLatestSentTaskCount()); // wait for events shouldn't call flush task - commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::ready; retVal = clWaitForEvents(1, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(expectedTaskCount, commandStreamReceiver.peekLatestSentTaskCount()); + if (commandStreamReceiver.isUpdateTagFromWaitEnabled()) { + EXPECT_EQ(expectedTaskCount + 1, commandStreamReceiver.peekLatestSentTaskCount()); + } else { + EXPECT_EQ(expectedTaskCount, commandStreamReceiver.peekLatestSentTaskCount()); + } retVal = clReleaseMemObject(buffer); EXPECT_EQ(CL_SUCCESS, retVal); @@ -692,7 +679,7 @@ HWTEST_F(EnqueueMapBufferTest, GivenPtrToReturnEventWhenMappingBufferThenEventIs EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); - EXPECT_EQ(pCmdQ->taskCount, eventObject->peekTaskCount()); + EXPECT_EQ(0u, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); retVal = clEnqueueUnmapMemObject( diff --git a/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp index c5dac8223e..e7fa7c806a 100644 --- a/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_map_image_tests.cpp @@ -414,7 +414,7 @@ HWTEST_F(EnqueueMapImageTest, givenReadOnlyMapWithOutEventWhenMappedThenSetEvent *pTagMemory = 5; auto &commandStreamReceiver = pCmdQ->getGpgpuCommandStreamReceiver(); - auto commandStreamReceiverTaskCountBefore = commandStreamReceiver.peekTaskCount(); + const auto commandStreamReceiverTaskCountBefore = commandStreamReceiver.peekTaskCount(); EXPECT_EQ(pCmdQ->getHeaplessStateInitEnabled() ? 2u : 1u, commandStreamReceiver.peekTaskCount()); auto ptr = pCmdQ->enqueueMapImage(image, false, mapFlags, origin, region, nullptr, nullptr, 0, @@ -433,10 +433,6 @@ HWTEST_F(EnqueueMapImageTest, givenReadOnlyMapWithOutEventWhenMappedThenSetEvent retVal = clEnqueueUnmapMemObject(pCmdQ, image, ptr, 0, nullptr, &unmapEventReturned); EXPECT_EQ(CL_SUCCESS, retVal); - if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { - commandStreamReceiverTaskCountBefore++; - } - EXPECT_EQ(commandStreamReceiverTaskCountBefore + 1, commandStreamReceiver.peekTaskCount()); auto unmapEvent = castToObject(unmapEventReturned); diff --git a/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp index e08bc80acf..3b6705d0ca 100644 --- a/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_marker_tests.cpp @@ -152,6 +152,9 @@ HWTEST_F(MarkerTest, WhenEnqueingMarkerThenReturnedEventShouldHaveEqualDepthToLa ASSERT_NE(nullptr, event); std::unique_ptr pEvent((Event *)(event)); + // Shouldn't sync to CSR + // should sync to command queue last packet + EXPECT_EQ(1u, pEvent->taskLevel); EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel); } diff --git a/opencl/test/unit_test/command_queue/enqueue_unmap_memobject_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_unmap_memobject_tests.cpp index 8b9162ea47..673463827c 100644 --- a/opencl/test/unit_test/command_queue/enqueue_unmap_memobject_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_unmap_memobject_tests.cpp @@ -134,7 +134,7 @@ HWTEST_F(EnqueueUnmapMemObjTest, WhenUnmappingMemoryObjectThenEventIsUpdated) { EXPECT_NE(nullptr, eventReturned); auto eventObject = castToObject(eventReturned); - EXPECT_EQ(pCmdQ->taskCount, eventObject->peekTaskCount()); + EXPECT_EQ(0u, eventObject->peekTaskCount()); EXPECT_TRUE(eventObject->updateStatusAndCheckCompletion()); clReleaseEvent(eventReturned); diff --git a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp index 1be195cd15..5e0b6e483e 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp @@ -52,7 +52,10 @@ HWTEST_F(GetSizeRequiredTest, WhenFinishingThenHeapsAndCommandBufferAreNotConsum EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH); } -HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAreNotConsumed) { +HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAndCommandBufferAreNotConsumed) { + auto &commandStream = pCmdQ->getCS(1024); + auto usedBeforeCS = commandStream.getUsed(); + Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; @@ -61,6 +64,14 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAreNotConsumed) { &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); + + size_t expectedStreamSize = 0; + if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && (!pCmdQ->getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled())) { + expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( + pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData), + MemoryConstants::cacheLineSize); + } + EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); EXPECT_EQ(0u, dsh->getUsed() - usedBeforeDSH); EXPECT_EQ(0u, ioh->getUsed() - usedBeforeIOH); EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH); @@ -68,7 +79,10 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAreNotConsumed) { clReleaseEvent(eventReturned); } -HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAreNotConsumed) { +HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAndCommandBufferAreNotConsumed) { + auto &commandStream = pCmdQ->getCS(1024); + auto usedBeforeCS = commandStream.getUsed(); + Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15); cl_event eventBeingWaitedOn = &event1; cl_event eventReturned = nullptr; @@ -77,9 +91,16 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAreNotConsumed) { &eventBeingWaitedOn, &eventReturned); EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(0u, dsh->getUsed() - usedBeforeDSH); - EXPECT_EQ(0u, ioh->getUsed() - usedBeforeIOH); - EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH); + + size_t expectedStreamSize = 0; + if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { + auto unalignedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + + EncodeStoreMemory::getStoreDataImmSize() + + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); + expectedStreamSize = alignUp(unalignedSize, MemoryConstants::cacheLineSize); + } + + EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); clReleaseEvent(eventReturned); } diff --git a/opencl/test/unit_test/command_queue/ooq_task_tests.cpp b/opencl/test/unit_test/command_queue/ooq_task_tests.cpp index 0c65d28fb1..d3d758cca4 100644 --- a/opencl/test/unit_test/command_queue/ooq_task_tests.cpp +++ b/opencl/test/unit_test/command_queue/ooq_task_tests.cpp @@ -358,6 +358,29 @@ HWTEST_F(OOQTaskTests, givenSkipDcFlushOnBarrierWithEventsEnabledWhenEnqueingBar EXPECT_FALSE(pCmdQ->isDcFlushRequiredOnStallingCommandsOnNextFlush()); } +HWTEST_F(OOQTaskTests, givenSkipDcFlushOnBarrierWithEventsEnabledWhenEnqueingBarrierWithWaitListWithEventThenDcFlushSet) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + if (false == commandStreamReceiver.peekTimestampPacketWriteEnabled()) { + GTEST_SKIP(); + } + DebugManagerStateRestore restorer; + debugManager.flags.SkipDcFlushOnBarrierWithoutEvents.set(1); + + const cl_uint numEventsInWaitList = 0; + const cl_event *eventWaitList = nullptr; + cl_event clEvent{}; + auto retVal = pCmdQ->enqueueBarrierWithWaitList( + numEventsInWaitList, + eventWaitList, + &clEvent); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_TRUE(pCmdQ->isStallingCommandsOnNextFlushRequired()); + EXPECT_TRUE(pCmdQ->isDcFlushRequiredOnStallingCommandsOnNextFlush()); + auto outEvent = castToObject(clEvent); + outEvent->release(); +} + HWTEST_F(OOQTaskTests, givenSkipDcFlushOnBarrierWithoutEventsDisabledWhenEnqueingBarrierWithWaitListThenDcFlushSet) { auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (false == commandStreamReceiver.peekTimestampPacketWriteEnabled()) { diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index 2f422fa496..830eaa4095 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -335,6 +335,19 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteDisabledWhenEnqueueingBa EXPECT_FALSE(cmdQ.isStallingCommandsOnNextFlushRequired()); } +HWTEST_F(TimestampPacketTests, givenBlockedQueueWhenEnqueueingBarrierThenDontRequestPipeControlOnCsrFlush) { + auto &csr = device->getUltCommandStreamReceiver(); + csr.timestampPacketWriteEnabled = true; + + MockCommandQueueHw cmdQ(context, device.get(), nullptr); + EXPECT_FALSE(cmdQ.isStallingCommandsOnNextFlushRequired()); + auto userEvent = makeReleaseable(); + cl_event waitlist[] = {userEvent.get()}; + cmdQ.enqueueBarrierWithWaitList(1, waitlist, nullptr); + EXPECT_FALSE(cmdQ.isStallingCommandsOnNextFlushRequired()); + userEvent->setStatus(CL_COMPLETE); +} + HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSizeThenAddSizeForPipeControl) { DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); @@ -564,12 +577,12 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingWithoutK auto outEvent = castToObject(clOutEvent); - EXPECT_NE(cmdQ->timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // new nodes obtained + EXPECT_EQ(cmdQ->timestampPacketContainer->peekNodes().at(0), cmdQNodes.peekNodes().at(0)); // no new nodes obtained EXPECT_EQ(1u, cmdQ->timestampPacketContainer->peekNodes().size()); auto &eventsNodes = outEvent->getTimestampPacketNodes()->peekNodes(); EXPECT_EQ(numEventsWithContainer + 1, eventsNodes.size()); // numEventsWithContainer + command queue - EXPECT_EQ(cmdQ->timestampPacketContainer->peekNodes().at(0), eventsNodes.at(0)); + EXPECT_EQ(cmdQNodes.peekNodes().at(0), eventsNodes.at(0)); EXPECT_EQ(event0.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(1)); EXPECT_EQ(event1.getTimestampPacketNodes()->peekNodes().at(0), eventsNodes.at(2)); @@ -630,7 +643,7 @@ HWTEST_TEMPLATED_F(TimestampPacketTestsWithMockCsrHw2, givenBlockedEnqueueWithou hwParserCmdQ.parseCommands(taskStream, 0); auto queueSemaphores = findAll(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); - auto expectedQueueSemaphoresCount = commands[i] == CL_COMMAND_MARKER ? 2u : 3u; + auto expectedQueueSemaphoresCount = 2u; if (UnitTestHelper::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) { expectedQueueSemaphoresCount += 1; } diff --git a/opencl/test/unit_test/sharings/gl/windows/gl_sharing_tests.cpp b/opencl/test/unit_test/sharings/gl/windows/gl_sharing_tests.cpp index dc281a2032..5ef7a47a65 100644 --- a/opencl/test/unit_test/sharings/gl/windows/gl_sharing_tests.cpp +++ b/opencl/test/unit_test/sharings/gl/windows/gl_sharing_tests.cpp @@ -518,7 +518,6 @@ TEST_F(GlSharingTests, givenEnabledAsyncEventsHandlerWhenAcquireGlObjectsIsCalle EXPECT_LT(CL_SUCCESS, event->peekExecutionStatus()); EXPECT_TRUE(handler->peekIsRegisterListEmpty()); - event->setStatus(CL_COMPLETE); event->release(); }