diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 2ada8fc977..870b77bd17 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -189,7 +189,8 @@ bool CommandQueueHw::waitForTimestamps(std::span copyEn if (isWaitForTimestampsEnabled()) { { - TakeOwnershipWrapper queueOwnership(*this); + // mainContainer == this->timestampPacketContainer.get() means wait is called from command queue on its TS. Lock is needed, bacuase another enqueue might generate TS and modify container + TakeOwnershipWrapper queueOwnership(*this, mainContainer == this->timestampPacketContainer.get()); waited = waitForTimestampsWithinContainer(mainContainer, getGpgpuCommandStreamReceiver(), status); } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index de78aecb37..95c9079ac6 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -198,7 +198,7 @@ cl_int CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, const auto &hwInfo = this->getDevice().getHardwareInfo(); auto &productHelper = getDevice().getProductHelper(); bool canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = false; - bool isNonStallingIoqBarrier = isFlushForProfilingRequired(commandType) && !isOOQEnabled() && (debugManager.flags.OptimizeIoqBarriersHandling.get() != 0); + bool isNonStallingIoqBarrier = commandType == CL_COMMAND_BARRIER && !isOOQEnabled() && (debugManager.flags.OptimizeIoqBarriersHandling.get() != 0); const bool isNonStallingIoqBarrierWithDependencies = isNonStallingIoqBarrier && (eventsRequest.numEventsInWaitList > 0); if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { @@ -1550,17 +1550,17 @@ cl_int CommandQueueHw::enqueueBlit(const MultiDispatchInfo &multiDisp if (deferredMultiRootSyncNodes.get()) { csrDeps.copyRootDeviceSyncNodesToNewContainer(*deferredMultiRootSyncNodes); } - if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) { - commandStreamReceiverOwnership.unlock(); - } - queueOwnership.unlock(); if (migratedMemory) { bcsCsr.flushBatchedSubmissions(); bcsCsr.flushTagUpdate(); } + if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) { + commandStreamReceiverOwnership.unlock(); + } bcsCommandStreamReceiverOwnership.unlock(); + queueOwnership.unlock(); if (blocking) { const auto waitStatus = waitForAllEngines(blockQueue, nullptr, false); if (waitStatus == WaitStatus::gpuHang) { diff --git a/opencl/source/helpers/enqueue_properties.h b/opencl/source/helpers/enqueue_properties.h index b9c4468f4c..ab55a03ea3 100644 --- a/opencl/source/helpers/enqueue_properties.h +++ b/opencl/source/helpers/enqueue_properties.h @@ -41,13 +41,13 @@ struct EnqueueProperties { return; } - if (flushDependenciesOnly) { - operation = Operation::dependencyResolveOnGpu; + if (isFlushWithEvent) { + operation = Operation::profilingOnly; return; } - if (isFlushWithEvent) { - operation = Operation::profilingOnly; + if (flushDependenciesOnly) { + operation = Operation::dependencyResolveOnGpu; return; } diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp index 070ff06d60..20ff4e38da 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_2_tests.cpp @@ -37,7 +37,7 @@ using MultiIoqCmdQSynchronizationTest = CommandQueueHwBlitTest; HWTEST_F(MultiIoqCmdQSynchronizationTest, givenTwoIoqCmdQsWhenEnqueuesSynchronizedWithMarkersThenCorrectSynchronizationIsApplied) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; if (pCmdQ->getTimestampPacketContainer() == nullptr) { GTEST_SKIP(); @@ -73,7 +73,11 @@ HWTEST_F(MultiIoqCmdQSynchronizationTest, givenTwoIoqCmdQsWhenEnqueuesSynchroniz LinearStream &bcsStream = pCmdQ2->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->getCS(0); HardwareParse bcsHwParser; bcsHwParser.parseCommands(bcsStream, bcsStart); - auto semaphoreCmdBcs = genCmdCast(*bcsHwParser.cmdList.begin()); + auto semaphoreBcsItor = find(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end()); + if (pClDevice->getProductHelper().isDcFlushAllowed()) { + ++semaphoreBcsItor; + } + auto semaphoreCmdBcs = genCmdCast(*semaphoreBcsItor); EXPECT_NE(nullptr, semaphoreCmdBcs); EXPECT_EQ(1u, semaphoreCmdBcs->getSemaphoreDataDword()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmdBcs->getCompareOperation()); @@ -91,18 +95,15 @@ HWTEST_F(MultiIoqCmdQSynchronizationTest, givenTwoIoqCmdQsWhenEnqueuesSynchroniz EXPECT_EQ(nodeGpuAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation()); - bool pipeControlForBcsSemaphoreFound = false; - auto pipeControlsAfterSemaphore = findAll(semaphoreCcsItor, ccsHwParser.cmdList.end()); - for (auto pipeControlIter : pipeControlsAfterSemaphore) { - auto pipeControlCmd = genCmdCast(*pipeControlIter); - if (0u == pipeControlCmd->getImmediateData() && - PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA == pipeControlCmd->getPostSyncOperation() && - NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd) == bcsSemaphoreAddress) { - pipeControlForBcsSemaphoreFound = true; - break; + bool storeRegmemForBcsSemaphoreFound = false; + auto storeRegMems = findAll(semaphoreCcsItor, ccsHwParser.cmdList.end()); + for (auto storeRegMemIter : storeRegMems) { + auto storeRegMemCmd = genCmdCast(*storeRegMemIter); + if (bcsSemaphoreAddress == storeRegMemCmd->getMemoryAddress()) { + storeRegmemForBcsSemaphoreFound = true; } } - EXPECT_TRUE(pipeControlForBcsSemaphoreFound); + EXPECT_TRUE(storeRegmemForBcsSemaphoreFound); } EXPECT_EQ(CL_SUCCESS, pCmdQ->finish(false));