feature: use User event in bcs split path if increment value is the same

Related-To: NEO-14557

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-09-23 16:35:04 +00:00
committed by Compute-Runtime-Automation
parent 8484c07e9b
commit 94d01b4d40
14 changed files with 265 additions and 168 deletions

View File

@@ -216,7 +216,7 @@ struct CommandListCoreFamily : public CommandListImp {
void appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency,
bool skipAddingWaitEventsToResidency, bool noopDispatch, bool dualStreamCopyOffloadOperation);
MOCKABLE_VIRTUAL void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired);
MOCKABLE_VIRTUAL void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired, bool skipAggregatedEventSignaling);
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining, bool copyOffloadOperation);
void handleInOrderCounterOverflow(bool copyOffloadOperation);
@@ -251,6 +251,7 @@ struct CommandListCoreFamily : public CommandListImp {
void assignInOrderExecInfoToEvent(Event *event);
bool hasInOrderDependencies() const;
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);
bool isUsingAdditionalBlitProperties() const { return useAdditionalBlitProperties; }
protected:
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(uintptr_t dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
@@ -372,7 +373,7 @@ struct CommandListCoreFamily : public CommandListImp {
uint32_t getRegionOffsetForAppendMemoryCopyBlitRegion(AlignedAllocationData *allocationData);
void handlePostSubmissionState();
MOCKABLE_VIRTUAL void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint32_t forceAggregatedEventIncValue, bool useAdditionalTimestamp);
MOCKABLE_VIRTUAL void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint64_t forceAggregatedEventIncValue, bool useAdditionalTimestamp);
void setupFillKernelArguments(size_t baseOffset,
size_t patternSize,

View File

@@ -228,7 +228,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderCounterOverflow(bool cop
inOrderExecInfo->setAllocationOffset(newOffset);
inOrderExecInfo->initializeAllocationsFromHost();
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false, false); // signal counter on new offset
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false, false, false); // signal counter on new offset
}
}
@@ -690,7 +690,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
}
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(event, false, false, false);
appendSignalInOrderDependencyCounter(event, false, false, false, false);
}
handleInOrderDependencyCounter(event, false, false);
event->unsetInOrderExecInfo();
@@ -736,7 +736,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
addToMappedEventList(signalEvent);
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -1728,7 +1728,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(uintptr_t
if (!useAdditionalBlitProperties) {
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true);
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
}
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -2038,7 +2038,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
if ((!useAdditionalBlitProperties || !isCopyOnlyEnabled) &&
(launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed || emitPipeControl)) {
dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnlyEnabled);
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false, false);
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
appendSignalAggregatedEventAtomic(*signalEvent);
}
@@ -2175,7 +2175,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
if (this->isInOrderExecutionEnabled()) {
if (inOrderCopyOnlySignalingAllowed) {
if (!useAdditionalBlitProperties) {
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
@@ -2722,7 +2722,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
if (this->isInOrderExecutionEnabled()) {
if (launchParams.isKernelSplitOperation || launchParams.pipeControlSignalling) {
dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnly(false));
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
} else {
nonWalkerInOrderCmdChaining = isInOrderNonWalkerSignalingRequired(signalEvent);
}
@@ -2840,7 +2840,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, cons
}
if (isInOrderExecutionEnabled() && isCopyOnlySignaling) {
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, memoryCopyParams.copyOffloadAllowed);
}
@@ -3094,7 +3094,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
}
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(event, false, false, false);
appendSignalInOrderDependencyCounter(event, false, false, false, false);
}
handleInOrderDependencyCounter(event, false, false);
@@ -3331,7 +3331,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
if (apiRequest) {
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(nullptr, false, false, false);
appendSignalInOrderDependencyCounter(nullptr, false, false, false, false);
}
handleInOrderDependencyCounter(nullptr, false, false);
}
@@ -3380,7 +3380,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalAggregatedEventAtomic(Eve
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired) {
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired, bool skipAggregatedEventSignaling) {
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE;
@@ -3431,7 +3431,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue, copyOffloadOperation);
}
if (Event::isAggregatedEvent(signalEvent)) {
if (!skipAggregatedEventSignaling && Event::isAggregatedEvent(signalEvent)) {
appendSignalAggregatedEventAtomic(*signalEvent);
}
@@ -3639,7 +3639,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -4268,7 +4268,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, skipPipeControl, false, isCopyOnly(false));
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -4421,7 +4421,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
}
handleInOrderDependencyCounter(signalEvent, false, false);
@@ -4482,7 +4482,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
}
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(nullptr, false, false, false);
appendSignalInOrderDependencyCounter(nullptr, false, false, false, false);
}
handleInOrderDependencyCounter(nullptr, false, false);

View File

@@ -16,7 +16,7 @@ constexpr bool CommandListCoreFamily<gfxCoreFamily>::checkIfAllocationImportedRe
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint32_t forceAggregatedEventIncValue, bool useAdditionalTimestamp) {
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint64_t forceAggregatedEventIncValue, bool useAdditionalTimestamp) {
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -596,7 +596,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
}
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, false, CommandToPatch::Invalid);
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false, false, false);
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false, false, false, false);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -696,8 +696,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
if (isSplitNeeded) {
setupFlagsForBcsSplit(memoryCopyParams, hasStallingCmds, copyOffloadFlush, srcptr, dstptr, size, size);
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint32_t aggregatedEventInvValue) {
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventInvValue;
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint64_t aggregatedEventIncValue) {
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventIncValue;
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, memoryCopyParams);
};
@@ -752,7 +752,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch),
this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch));
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint32_t aggregatedEventInvValue) {
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint64_t aggregatedEventIncValue) {
ze_copy_region_t dstRegionLocal = {};
ze_copy_region_t srcRegionLocal = {};
memcpy(&dstRegionLocal, dstRegion, sizeof(ze_copy_region_t));
@@ -761,7 +761,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
dstRegionLocal.width = static_cast<uint32_t>(sizeParam);
srcRegionLocal.originX = srcOriginXParam;
srcRegionLocal.width = static_cast<uint32_t>(sizeParam);
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventInvValue;
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventIncValue;
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, &dstRegionLocal, dstPitch, dstSlicePitch,
srcPtr, &srcRegionLocal, srcPitch, srcSlicePitch,
hSignalEventParam, 0u, nullptr, memoryCopyParams);
@@ -838,8 +838,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
setupFlagsForBcsSplit(bcsSplitMemoryCopyParams, hasStallingCmds, copyOffloadFlush, srcAddress, dstAddress, size, size);
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstAddressParam, const void *srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint32_t aggregatedEventInvValue) {
bcsSplitMemoryCopyParams.forceAggregatedEventIncValue = aggregatedEventInvValue;
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstAddressParam, const void *srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint64_t aggregatedEventIncValue) {
bcsSplitMemoryCopyParams.forceAggregatedEventIncValue = aggregatedEventIncValue;
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstAddressParam, srcAddressParam, sizeParam, hSignalEventParam, 0u, nullptr, bcsSplitMemoryCopyParams);
};
@@ -1377,7 +1377,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) {
ze_result_t ret = ZE_RESULT_SUCCESS;
if (waitOnInOrderCounterRequired && !this->isHeaplessModeEnabled() && this->latestOperationHasOptimizedCbEvent) {
this->appendSignalInOrderDependencyCounter(nullptr, false, true, false);
this->appendSignalInOrderDependencyCounter(nullptr, false, true, false, false);
this->inOrderExecInfo->addCounterValue(this->getInOrderIncrementValue());
this->handleInOrderCounterOverflow(false);
ret = flushImmediate(ret, false, true, false, NEO::AppendOperations::nonKernel, false, nullptr, false, nullptr, nullptr);
@@ -1865,6 +1865,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(signalEvent,
copyOffloadOperation,
false,
false,
false);
}

View File

@@ -507,12 +507,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (compactEvent && compactEvent->isCounterBased()) {
auto pcCmdPtr = this->commandContainer.getCommandStream()->getSpace(0u);
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, true, textureFlushRequired);
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, true, textureFlushRequired, false);
addCmdForPatching(nullptr, pcCmdPtr, nullptr, inOrderCounterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::pipeControl);
textureFlushRequired = false;
} else {
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, false, false);
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, false, false, false);
}
} else {
this->latestOperationHasOptimizedCbEvent = true;

View File

@@ -14,11 +14,11 @@
namespace L0 {
struct CmdListMemoryCopyParams {
uint64_t forceAggregatedEventIncValue = 0;
const void *bcsSplitBaseSrcPtr = nullptr;
void *bcsSplitBaseDstPtr = nullptr;
size_t bcsSplitTotalSrcSize = 0;
size_t bcsSplitTotalDstSize = 0;
uint32_t forceAggregatedEventIncValue = 0;
bool relaxedOrderingDispatch = false;
bool forceDisableCopyOnlyInOrderSignaling = false;
bool copyOffloadAllowed = false;

View File

@@ -30,7 +30,7 @@ struct DeviceImp;
struct BcsSplit {
template <GFXCORE_FAMILY gfxCoreFamily, typename T, typename K>
using AppendCallFuncT = std::function<ze_result_t(CommandListCoreFamilyImmediate<gfxCoreFamily> *, T, K, size_t, ze_event_handle_t, uint32_t)>;
using AppendCallFuncT = std::function<ze_result_t(CommandListCoreFamilyImmediate<gfxCoreFamily> *, T, K, size_t, ze_event_handle_t, uint64_t)>;
using CsrContainer = StackVec<NEO::CommandStreamReceiver *, 12u>;
DeviceImp &device;
@@ -81,15 +81,30 @@ struct BcsSplit {
NEO::TransferDirection direction,
size_t estimatedCmdBufferSize,
AppendCallFuncT<gfxCoreFamily, T, K> appendCall) {
constexpr size_t maxEventCountInPool = MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType);
const auto aggregatedEventsMode = this->events.aggregatedEventsMode;
auto signalEvent = Event::fromHandle(hSignalEvent);
ze_result_t result = ZE_RESULT_SUCCESS;
auto &cmdListsForSplit = this->getCmdListsForSplit(direction);
auto engineCount = cmdListsForSplit.size();
size_t markerEventIndex = 0;
uint64_t aggregatedEventIncrementVal = 1;
auto markerEventIndexRet = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
if (!markerEventIndexRet.has_value()) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
const bool useSignalEventForSubcopy = aggregatedEventsMode && cmdList->isUsingAdditionalBlitProperties() && Event::isAggregatedEvent(signalEvent) &&
(signalEvent->getInOrderIncrementValue() % engineCount == 0);
if (useSignalEventForSubcopy) {
aggregatedEventIncrementVal = signalEvent->getInOrderIncrementValue() / engineCount;
} else {
auto markerEventIndexRet = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), maxEventCountInPool);
if (!markerEventIndexRet.has_value()) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
markerEventIndex = *markerEventIndexRet;
}
auto markerEventIndex = *markerEventIndexRet;
auto barrierRequired = !cmdList->isInOrderExecutionEnabled() && cmdList->isBarrierRequired();
if (barrierRequired) {
cmdList->appendSignalEvent(this->events.barrier[markerEventIndex]->toHandle(), false);
@@ -98,18 +113,11 @@ struct BcsSplit {
auto subcopyEventIndex = markerEventIndex * this->cmdLists.size();
StackVec<ze_event_handle_t, 16> eventHandles;
auto &cmdListsForSplit = this->getCmdListsForSplit(direction);
auto signalEvent = Event::fromHandle(hSignalEvent);
if (!cmdList->handleCounterBasedEventOperations(signalEvent, false)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
const auto aggregatedEventsMode = this->events.aggregatedEventsMode;
auto totalSize = size;
auto engineCount = cmdListsForSplit.size();
for (size_t i = 0; i < cmdListsForSplit.size(); i++) {
auto subCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(cmdListsForSplit[i]);
@@ -126,7 +134,7 @@ struct BcsSplit {
}
subCmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, false, false, false);
if (signalEvent && i == 0u) {
if (!useSignalEventForSubcopy && signalEvent && i == 0u) {
subCmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false, true);
}
@@ -135,8 +143,8 @@ struct BcsSplit {
auto localSrcPtr = ptrOffset(srcptr, size - totalSize);
auto copyEventIndex = aggregatedEventsMode ? markerEventIndex : subcopyEventIndex + i;
auto eventHandle = this->events.subcopy[copyEventIndex]->toHandle();
result = appendCall(subCmdList, localDstPtr, localSrcPtr, localSize, eventHandle, 1);
auto eventHandle = useSignalEventForSubcopy ? signalEvent : this->events.subcopy[copyEventIndex]->toHandle();
result = appendCall(subCmdList, localDstPtr, localSrcPtr, localSize, eventHandle, aggregatedEventIncrementVal);
subCmdList->flushImmediate(result, true, !hasRelaxedOrderingDependencies, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, false, nullptr, true, nullptr, nullptr);
if ((aggregatedEventsMode && i == 0) || !aggregatedEventsMode) {
@@ -157,7 +165,7 @@ struct BcsSplit {
const auto isCopyCmdList = cmdList->isCopyOnly(dualStreamCopyOffload);
if (signalEvent) {
if (!useSignalEventForSubcopy && signalEvent) {
cmdList->appendSignalEventPostWalker(signalEvent, nullptr, nullptr, !isCopyCmdList, false, isCopyCmdList);
}
@@ -166,11 +174,11 @@ struct BcsSplit {
}
if (cmdList->isInOrderExecutionEnabled()) {
cmdList->appendSignalInOrderDependencyCounter(signalEvent, dualStreamCopyOffload, false, false);
cmdList->appendSignalInOrderDependencyCounter(signalEvent, dualStreamCopyOffload, false, false, useSignalEventForSubcopy);
}
cmdList->handleInOrderDependencyCounter(signalEvent, false, dualStreamCopyOffload);
if (aggregatedEventsMode) {
if (aggregatedEventsMode && !useSignalEventForSubcopy) {
cmdList->assignInOrderExecInfoToEvent(this->events.marker[markerEventIndex]);
}

View File

@@ -322,7 +322,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter(event, false, false, false);
appendSignalInOrderDependencyCounter(event, false, false, false, false);
}
return ZE_RESULT_SUCCESS;