performance: Allow optimized CB events on MCL

Resolves: NEO-14195

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-06-13 13:59:13 +00:00
committed by Compute-Runtime-Automation
parent 4729b82805
commit 747a708213
3 changed files with 21 additions and 15 deletions

View File

@@ -2976,7 +2976,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
isQwordInOrderCounter(), copyOnlyWait);
} else {
auto resolveDependenciesViaPipeControls = !copyOnlyWait && !this->asMutable() && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
auto resolveDependenciesViaPipeControls = !copyOnlyWait && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) {
resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get();
@@ -3134,7 +3134,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
continue;
}
if (event->isCounterBased() && (this->heaplessModeEnabled || !event->hasInOrderTimestampNode() || this->asMutable())) {
if (event->isCounterBased() && (this->heaplessModeEnabled || !event->hasInOrderTimestampNode())) {
// 1. Regular CmdList adds submission counter to base value on each Execute
// 2. Immediate CmdList takes current value (with submission counter)
auto waitValue = !isImmediateType() ? event->getInOrderExecBaseSignalValue() : event->getInOrderExecSignalValueWithSubmissionCounter();

View File

@@ -313,7 +313,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
std::list<void *> additionalCommands;
if (compactEvent && (!compactEvent->isCounterBased() || this->asMutable())) {
if (compactEvent && !compactEvent->isCounterBased()) {
appendEventForProfilingAllWalkers(compactEvent, nullptr, launchParams.outListCommands, true, true, launchParams.omitAddingEventResidency, false);
}
@@ -331,12 +331,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
inOrderNonWalkerSignalling = isInOrderNonWalkerSignalingRequired(eventForInOrderExec);
if (inOrderExecSignalRequired) {
if (!compactEvent || this->asMutable() || !compactEvent->isCounterBased() || compactEvent->isUsingContextEndOffset()) {
if (!compactEvent || !compactEvent->isCounterBased() || compactEvent->isUsingContextEndOffset()) {
if (inOrderNonWalkerSignalling) {
if (!eventForInOrderExec->getAllocation(this->device)) {
eventForInOrderExec->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag(), this->partitionCount);
}
if (this->asMutable() || !eventForInOrderExec->isCounterBased()) {
if (!eventForInOrderExec->isCounterBased()) {
dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false);
} else if (compactEvent) {
eventAddress = eventForInOrderExec->getPacketAddress(this->device);
@@ -464,7 +464,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
if (!launchParams.makeKernelCommandView) {
if (compactEvent && (!compactEvent->isCounterBased() || this->asMutable())) {
if (compactEvent && !compactEvent->isCounterBased()) {
void **syncCmdBuffer = nullptr;
if (launchParams.outSyncCommand != nullptr) {
launchParams.outSyncCommand->type = CommandToPatch::SignalEventPostSyncPipeControl;
@@ -489,8 +489,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (inOrderExecSignalRequired) {
if (inOrderNonWalkerSignalling) {
if (!launchParams.skipInOrderNonWalkerSignaling) {
if (!(eventForInOrderExec->isCounterBased() && eventForInOrderExec->isUsingContextEndOffset()) || this->asMutable()) {
if (compactEvent && (compactEvent->isCounterBased() && !this->asMutable())) {
if (!(eventForInOrderExec->isCounterBased() && eventForInOrderExec->isUsingContextEndOffset())) {
if (compactEvent && compactEvent->isCounterBased()) {
auto pcCmdPtr = this->commandContainer.getCommandStream()->getSpace(0u);
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, true, textureFlushRequired);

View File

@@ -300,14 +300,20 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue)) {
bool signaled = true;
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>(), 0)) {
signaled = false;
break;
}
hostAddress = ptrOffset(hostAddress, device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset());
if (this->isCounterBased() && !this->inOrderTimestampNode.empty() && !this->device->getCompilerProductHelper().isHeaplessModeEnabled(this->device->getHwInfo())) {
this->synchronizeTimestampCompletionWithTimeout();
signaled = this->isTimestampPopulated();
} else {
const uint64_t *hostAddress = ptrOffset(inOrderExecInfo->getBaseHostAddress(), this->inOrderAllocationOffset);
for (uint32_t i = 0; i < inOrderExecInfo->getNumHostPartitionsToWait(); i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>(), 0)) {
signaled = false;
break;
}
hostAddress = ptrOffset(hostAddress, device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset());
}
}
if (!signaled) {