performance: don't flush gpgpu if not required

Related-To: NEO-12124

If queue is OOQ and there are no cross-engine dependencies,
don't flush CCS before submitting copy on BCS.

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2024-07-25 15:45:32 +00:00
committed by Compute-Runtime-Automation
parent 05b8c2ed97
commit ace883ca55
8 changed files with 31 additions and 6 deletions

View File

@@ -521,7 +521,7 @@ class CommandQueueHw : public CommandQueue {
TimestampPacketDependencies &timestampPacketDependencies,
bool relaxedOrderingEnabled);
MOCKABLE_VIRTUAL bool isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies &timestampPacketDependencies) const;
MOCKABLE_VIRTUAL bool isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies &timestampPacketDependencies, bool containsCrossEngineDependency) const;
void setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType);
bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo);

View File

@@ -205,10 +205,14 @@ void CommandQueueHw<Family>::setupBlitAuxTranslation(MultiDispatchInfo &multiDis
}
template <typename Family>
bool CommandQueueHw<Family>::isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies &timestampPacketDependencies) const {
bool CommandQueueHw<Family>::isGpgpuSubmissionForBcsRequired(bool queueBlocked, TimestampPacketDependencies &timestampPacketDependencies, bool containsCrossEngineDependency) const {
if (queueBlocked || timestampPacketDependencies.barrierNodes.peekNodes().size() > 0u) {
return true;
}
if (isOOQEnabled()) {
return containsCrossEngineDependency;
}
bool required = false;
switch (latestSentEnqueueType) {
case NEO::EnqueueProperties::Operation::explicitCacheFlush:

View File

@@ -1460,7 +1460,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlit(const MultiDispatchInfo &multiDisp
migratedMemory = migrateMultiGraphicsAllocationsIfRequired(multiDispatchInfo.peekBuiltinOpParams(), bcsCsr);
}
auto gpgpuSubmission = isGpgpuSubmissionForBcsRequired(blockQueue, timestampPacketDependencies);
auto gpgpuSubmission = isGpgpuSubmissionForBcsRequired(blockQueue, timestampPacketDependencies, csrDeps.containsCrossEngineDependency);
if ((isCacheFlushForBcsRequired() || NEO::EnqueueProperties::Operation::dependencyResolveOnGpu == latestSentEnqueueType) && gpgpuSubmission) {
timestampPacketDependencies.cacheFlushNodes.add(allocator->getTag());
}

View File

@@ -67,6 +67,7 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
}
csrDeps.csrWithMultiEngineDependencies.insert(dependentCsr);
}
csrDeps.containsCrossEngineDependency = true;
}
}
}