fix: ensure profiling enabled for OCL Blit Split path

Related-To: NEO-13842

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-01-23 12:32:51 +00:00
committed by Compute-Runtime-Automation
parent ae772c849d
commit c75bcba1e6
4 changed files with 91 additions and 14 deletions

View File

@@ -457,12 +457,12 @@ class CommandQueueHw : public CommandQueue {
LinearStream *commandStream,
EventsRequest &eventsRequest,
CsrDependencies &csrDeps);
BlitProperties processDispatchForBlitEnqueue(CommandStreamReceiver &blitCommandStreamReceiver,
const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest,
LinearStream *commandStream,
uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync);
MOCKABLE_VIRTUAL BlitProperties processDispatchForBlitEnqueue(CommandStreamReceiver &blitCommandStreamReceiver,
const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest,
LinearStream *commandStream,
uint32_t commandType, bool queueBlocked, bool profilingEnabled, TagNodeBase *multiRootDeviceEventSync);
void submitCacheFlush(Surface **surfaces,
size_t numSurfaces,
LinearStream *commandStream,

View File

@@ -575,7 +575,7 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
const MultiDispatchInfo &multiDispatchInfo,
TimestampPacketDependencies &timestampPacketDependencies,
const EventsRequest &eventsRequest, LinearStream *commandStream,
uint32_t commandType, bool queueBlocked, TagNodeBase *multiRootDeviceEventSync) {
uint32_t commandType, bool queueBlocked, bool profilingEnabled, TagNodeBase *multiRootDeviceEventSync) {
auto blitDirection = ClBlitProperties::obtainBlitDirection(commandType);
auto blitProperties = ClBlitProperties::constructProperties(blitDirection, blitCommandStreamReceiver,
@@ -592,7 +592,7 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(CommandS
blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync;
auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0);
blitProperties.blitSyncProperties.outputTimestampPacket = currentTimestampPacketNode;
blitProperties.blitSyncProperties.syncMode = (eventsRequest.outEvent && isProfilingEnabled()) ? BlitSyncMode::timestamp : BlitSyncMode::immediate;
blitProperties.blitSyncProperties.syncMode = profilingEnabled ? BlitSyncMode::timestamp : BlitSyncMode::immediate;
if (commandStream) {
if (timestampPacketDependencies.cacheFlushNodes.peekNodes().size() > 0) {
@@ -1430,6 +1430,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
}
eventsRequest.setupBcsCsrForOutputEvent(bcsCsr);
const bool profilingEnabled = isProfilingEnabled() && pEventBuilder->getEvent();
std::unique_ptr<KernelOperation> blockedCommandsData;
TakeOwnershipWrapper<CommandQueueHw<GfxFamily>> queueOwnership(*this);
if (debugManager.flags.ForceCsrLockInBcsEnqueueOnlyForGpgpuSubmission.get() != 1) {
@@ -1513,7 +1515,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
}
blitPropertiesContainer.push_back(processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
eventsRequest, gpgpuCommandStream, cmdType, blockQueue, multiRootEventSyncStamp));
eventsRequest, gpgpuCommandStream, cmdType, blockQueue, profilingEnabled, multiRootEventSyncStamp));
if (!blockQueue) {
completionStamp = enqueueCommandWithoutKernel(nullptr, 0, gpgpuCommandStream, gpgpuCommandStreamStart, blocking,