feature: control post sync completion check

Related-To: NEO-14844

Signed-off-by: Tomasz Biernacik <tomasz.biernacik@intel.com>
This commit is contained in:
Tomasz Biernacik
2025-07-10 10:59:42 +00:00
committed by Compute-Runtime-Automation
parent 087d1ecea4
commit 2c5cbec033
24 changed files with 447 additions and 32 deletions

View File

@@ -137,6 +137,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(this->heaplessModeEnabled);
this->isForceStateless = compilerProductHelper.isForceToStatelessRequired();
this->l3FlushAfterPostSyncEnabled = productHelper.isL3FlushAfterPostSyncRequired(this->heaplessModeEnabled);
this->shouldRegisterEnqueuedWalkerWithProfiling = productHelper.shouldRegisterEnqueuedWalkerWithProfiling();
}
}

View File

@@ -415,6 +415,20 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
return this->isCacheFlushOnNextBcsWriteRequired && this->isImageWriteOperation(cmdType);
}
bool getShouldRegisterEnqueuedWalkerWithProfiling() {
return this->shouldRegisterEnqueuedWalkerWithProfiling;
}
void registerWalkerWithProfilingEnqueued() {
this->isWalkerWithProfilingEnqueued = true;
}
bool getAndClearIsWalkerWithProfilingEnqueued() {
bool retVal = this->isWalkerWithProfilingEnqueued;
this->isWalkerWithProfilingEnqueued = false;
return retVal;
}
protected:
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
@@ -529,6 +543,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
bool isForceStateless = false;
bool l3FlushedAfterCpuRead = true;
bool l3FlushAfterPostSyncEnabled = false;
bool isWalkerWithProfilingEnqueued = false;
bool shouldRegisterEnqueuedWalkerWithProfiling = false;
};
static_assert(NEO::NonCopyableAndNonMovable<CommandQueue>);

View File

@@ -918,6 +918,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
);
dispatchFlags.isWalkerWithProfilingEnqueued = getAndClearIsWalkerWithProfilingEnqueued();
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
uint32_t lws[3] = {static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().x), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().y), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().z)};
@@ -1178,6 +1179,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
);
dispatchFlags.isWalkerWithProfilingEnqueued = getAndClearIsWalkerWithProfilingEnqueued();
const bool isHandlingBarrier = isStallingCommandsOnNextFlushRequired();
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {

View File

@@ -155,12 +155,17 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getRootDeviceEnvironment());
}
if (commandQueue.getShouldRegisterEnqueuedWalkerWithProfiling() && commandQueue.isProfilingEnabled() && walkerArgs.event) {
commandQueue.registerWalkerWithProfilingEnqueued();
}
if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(debugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
uint32_t registerOffset = debugManager.flags.GpuScratchRegWriteRegisterOffset.get();
uint32_t registerData = debugManager.flags.GpuScratchRegWriteRegisterData.get();
PipeControlArgs args;
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, commandQueue.getDevice().getRootDeviceEnvironment());
args.isWalkerWithProfilingEnqueued = commandQueue.getAndClearIsWalkerWithProfilingEnqueued();
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandStream,
PostSyncMode::noWrite,