mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
feature: control post sync completion check
Related-To: NEO-14844 Signed-off-by: Tomasz Biernacik <tomasz.biernacik@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
087d1ecea4
commit
2c5cbec033
@@ -137,6 +137,7 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
this->heaplessStateInitEnabled = compilerProductHelper.isHeaplessStateInitEnabled(this->heaplessModeEnabled);
|
||||
this->isForceStateless = compilerProductHelper.isForceToStatelessRequired();
|
||||
this->l3FlushAfterPostSyncEnabled = productHelper.isL3FlushAfterPostSyncRequired(this->heaplessModeEnabled);
|
||||
this->shouldRegisterEnqueuedWalkerWithProfiling = productHelper.shouldRegisterEnqueuedWalkerWithProfiling();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -415,6 +415,20 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
return this->isCacheFlushOnNextBcsWriteRequired && this->isImageWriteOperation(cmdType);
|
||||
}
|
||||
|
||||
bool getShouldRegisterEnqueuedWalkerWithProfiling() {
|
||||
return this->shouldRegisterEnqueuedWalkerWithProfiling;
|
||||
}
|
||||
|
||||
void registerWalkerWithProfilingEnqueued() {
|
||||
this->isWalkerWithProfilingEnqueued = true;
|
||||
}
|
||||
|
||||
bool getAndClearIsWalkerWithProfilingEnqueued() {
|
||||
bool retVal = this->isWalkerWithProfilingEnqueued;
|
||||
this->isWalkerWithProfilingEnqueued = false;
|
||||
return retVal;
|
||||
}
|
||||
|
||||
protected:
|
||||
void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
|
||||
cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
|
||||
@@ -529,6 +543,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
||||
bool isForceStateless = false;
|
||||
bool l3FlushedAfterCpuRead = true;
|
||||
bool l3FlushAfterPostSyncEnabled = false;
|
||||
bool isWalkerWithProfilingEnqueued = false;
|
||||
bool shouldRegisterEnqueuedWalkerWithProfiling = false;
|
||||
};
|
||||
|
||||
static_assert(NEO::NonCopyableAndNonMovable<CommandQueue>);
|
||||
|
||||
@@ -918,6 +918,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
||||
);
|
||||
|
||||
dispatchFlags.isWalkerWithProfilingEnqueued = getAndClearIsWalkerWithProfilingEnqueued();
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
|
||||
uint32_t lws[3] = {static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().x), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().y), static_cast<uint32_t>(multiDispatchInfo.begin()->getLocalWorkgroupSize().z)};
|
||||
@@ -1178,6 +1179,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
isDcFlushRequiredOnStallingCommandsOnNextFlush() // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
||||
);
|
||||
|
||||
dispatchFlags.isWalkerWithProfilingEnqueued = getAndClearIsWalkerWithProfilingEnqueued();
|
||||
|
||||
const bool isHandlingBarrier = isStallingCommandsOnNextFlushRequired();
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
|
||||
@@ -155,12 +155,17 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, walkerArgs.timestampPacketDependencies, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
}
|
||||
|
||||
if (commandQueue.getShouldRegisterEnqueuedWalkerWithProfiling() && commandQueue.isProfilingEnabled() && walkerArgs.event) {
|
||||
commandQueue.registerWalkerWithProfilingEnqueued();
|
||||
}
|
||||
|
||||
if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(debugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
|
||||
uint32_t registerOffset = debugManager.flags.GpuScratchRegWriteRegisterOffset.get();
|
||||
uint32_t registerData = debugManager.flags.GpuScratchRegWriteRegisterData.get();
|
||||
|
||||
PipeControlArgs args;
|
||||
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, commandQueue.getDevice().getRootDeviceEnvironment());
|
||||
args.isWalkerWithProfilingEnqueued = commandQueue.getAndClearIsWalkerWithProfilingEnqueued();
|
||||
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
*commandStream,
|
||||
PostSyncMode::noWrite,
|
||||
|
||||
Reference in New Issue
Block a user