fix: Few enqueue handler fixes

-do not wait for event TS under cmdQ's lock
-determine latest enqueue operation in correct order
-do not recognize marker as a barrier in some cases
-fix mutex order in enqueu blit

Related-To: HSD-16027856705

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-11-24 10:18:22 +00:00
committed by Compute-Runtime-Automation
parent 56b30d1803
commit 9228fa1251
4 changed files with 24 additions and 22 deletions

View File

@@ -189,7 +189,8 @@ bool CommandQueueHw<Family>::waitForTimestamps(std::span<CopyEngineState> copyEn
if (isWaitForTimestampsEnabled()) {
{
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this);
// mainContainer == this->timestampPacketContainer.get() means wait is called from command queue on its TS. Lock is needed, bacuase another enqueue might generate TS and modify container
TakeOwnershipWrapper<CommandQueue> queueOwnership(*this, mainContainer == this->timestampPacketContainer.get());
waited = waitForTimestampsWithinContainer<TSPacketType>(mainContainer, getGpgpuCommandStreamReceiver(), status);
}

View File

@@ -198,7 +198,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
const auto &hwInfo = this->getDevice().getHardwareInfo();
auto &productHelper = getDevice().getProductHelper();
bool canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = false;
bool isNonStallingIoqBarrier = isFlushForProfilingRequired(commandType) && !isOOQEnabled() && (debugManager.flags.OptimizeIoqBarriersHandling.get() != 0);
bool isNonStallingIoqBarrier = commandType == CL_COMMAND_BARRIER && !isOOQEnabled() && (debugManager.flags.OptimizeIoqBarriersHandling.get() != 0);
const bool isNonStallingIoqBarrierWithDependencies = isNonStallingIoqBarrier && (eventsRequest.numEventsInWaitList > 0);
if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
@@ -1550,17 +1550,17 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
if (deferredMultiRootSyncNodes.get()) {
csrDeps.copyRootDeviceSyncNodesToNewContainer(*deferredMultiRootSyncNodes);
}
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
commandStreamReceiverOwnership.unlock();
}
queueOwnership.unlock();
if (migratedMemory) {
bcsCsr.flushBatchedSubmissions();
bcsCsr.flushTagUpdate();
}
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
commandStreamReceiverOwnership.unlock();
}
bcsCommandStreamReceiverOwnership.unlock();
queueOwnership.unlock();
if (blocking) {
const auto waitStatus = waitForAllEngines(blockQueue, nullptr, false);
if (waitStatus == WaitStatus::gpuHang) {