diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index ccb132dacb..dbbfa5a08b 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -236,6 +236,10 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, } } + if (isMarkerWithProfiling) { + flushDependenciesForNonKernelCommand = true; + } + if (flushDependenciesForNonKernelCommand) { TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer(commandStream, csrDeps); } diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 700344f00b..e7e9d10507 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -1541,6 +1541,33 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventFromCpuCopyWhenWaitingFo clReleaseEvent(outEvent2); } +HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperationWhenItIsWaitedItHasProperDependencies) { + auto buffer = createBuffer(1, false); + int hostPtr = 0; + + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto mockCmdQueue = static_cast *>(commandQueue.get()); + mockCmdQueue->commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE; + + cl_event outEvent1; + commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + + auto offset = mockCmdQueue->getCS(0).getUsed(); + + //marker needs to program semaphore + commandQueue->enqueueMarkerWithWaitList(0, nullptr, &outEvent1); + + auto cmdListQueue = getCmdList(mockCmdQueue->getCS(0), offset); + expectCommand(cmdListQueue.begin(), cmdListQueue.end()); + + clWaitForEvents(1, &outEvent1); + + clReleaseEvent(outEvent1); +} + using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>; HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {