mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-07 12:42:54 +08:00
feature: use User event in bcs split path if increment value is the same
Related-To: NEO-14557 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8484c07e9b
commit
94d01b4d40
@@ -216,7 +216,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void appendWaitOnInOrderDependency(std::shared_ptr<NEO::InOrderExecInfo> &inOrderExecInfo, CommandToPatchContainer *outListCommands,
|
||||
uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency,
|
||||
bool skipAddingWaitEventsToResidency, bool noopDispatch, bool dualStreamCopyOffloadOperation);
|
||||
MOCKABLE_VIRTUAL void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired);
|
||||
MOCKABLE_VIRTUAL void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired, bool skipAggregatedEventSignaling);
|
||||
void handleInOrderDependencyCounter(Event *signalEvent, bool nonWalkerInOrderCmdsChaining, bool copyOffloadOperation);
|
||||
void handleInOrderCounterOverflow(bool copyOffloadOperation);
|
||||
|
||||
@@ -251,6 +251,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void assignInOrderExecInfoToEvent(Event *event);
|
||||
bool hasInOrderDependencies() const;
|
||||
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency, bool copyOperation);
|
||||
bool isUsingAdditionalBlitProperties() const { return useAdditionalBlitProperties; }
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(uintptr_t dstPtr, NEO::GraphicsAllocation *dstPtrAlloc,
|
||||
@@ -372,7 +373,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
uint32_t getRegionOffsetForAppendMemoryCopyBlitRegion(AlignedAllocationData *allocationData);
|
||||
void handlePostSubmissionState();
|
||||
|
||||
MOCKABLE_VIRTUAL void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint32_t forceAggregatedEventIncValue, bool useAdditionalTimestamp);
|
||||
MOCKABLE_VIRTUAL void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint64_t forceAggregatedEventIncValue, bool useAdditionalTimestamp);
|
||||
|
||||
void setupFillKernelArguments(size_t baseOffset,
|
||||
size_t patternSize,
|
||||
|
||||
@@ -228,7 +228,7 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderCounterOverflow(bool cop
|
||||
inOrderExecInfo->setAllocationOffset(newOffset);
|
||||
inOrderExecInfo->initializeAllocationsFromHost();
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false, false); // signal counter on new offset
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false, false, false); // signal counter on new offset
|
||||
}
|
||||
}
|
||||
|
||||
@@ -690,7 +690,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||
}
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(event, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(event, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(event, false, false);
|
||||
event->unsetInOrderExecInfo();
|
||||
@@ -736,7 +736,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
addToMappedEventList(signalEvent);
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, false);
|
||||
|
||||
@@ -1728,7 +1728,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(uintptr_t
|
||||
if (!useAdditionalBlitProperties) {
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true);
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
|
||||
}
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, false);
|
||||
@@ -2038,7 +2038,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
if ((!useAdditionalBlitProperties || !isCopyOnlyEnabled) &&
|
||||
(launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed || emitPipeControl)) {
|
||||
dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnlyEnabled);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false, false);
|
||||
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
|
||||
appendSignalAggregatedEventAtomic(*signalEvent);
|
||||
}
|
||||
@@ -2175,7 +2175,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
if (inOrderCopyOnlySignalingAllowed) {
|
||||
if (!useAdditionalBlitProperties) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, memoryCopyParams.copyOffloadAllowed, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, isCopyOnlyEnabled);
|
||||
} else if (!useAdditionalBlitProperties && isCopyOnlyEnabled && Event::isAggregatedEvent(signalEvent)) {
|
||||
@@ -2722,7 +2722,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
if (launchParams.isKernelSplitOperation || launchParams.pipeControlSignalling) {
|
||||
dispatchInOrderPostOperationBarrier(signalEvent, dcFlush, isCopyOnly(false));
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
|
||||
} else {
|
||||
nonWalkerInOrderCmdChaining = isInOrderNonWalkerSignalingRequired(signalEvent);
|
||||
}
|
||||
@@ -2840,7 +2840,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, cons
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled() && isCopyOnlySignaling) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, memoryCopyParams.copyOffloadAllowed);
|
||||
}
|
||||
@@ -3094,7 +3094,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
}
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(event, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(event, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(event, false, false);
|
||||
|
||||
@@ -3331,7 +3331,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||
|
||||
if (apiRequest) {
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(nullptr, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(nullptr, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(nullptr, false, false);
|
||||
}
|
||||
@@ -3380,7 +3380,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalAggregatedEventAtomic(Eve
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired, bool skipAggregatedEventSignaling) {
|
||||
using ATOMIC_OPCODES = typename GfxFamily::MI_ATOMIC::ATOMIC_OPCODES;
|
||||
using DATA_SIZE = typename GfxFamily::MI_ATOMIC::DATA_SIZE;
|
||||
|
||||
@@ -3431,7 +3431,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
|
||||
appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue, copyOffloadOperation);
|
||||
}
|
||||
|
||||
if (Event::isAggregatedEvent(signalEvent)) {
|
||||
if (!skipAggregatedEventSignaling && Event::isAggregatedEvent(signalEvent)) {
|
||||
appendSignalAggregatedEventAtomic(*signalEvent);
|
||||
}
|
||||
|
||||
@@ -3639,7 +3639,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, false);
|
||||
|
||||
@@ -4268,7 +4268,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, skipPipeControl, false, isCopyOnly(false));
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, false);
|
||||
|
||||
@@ -4421,7 +4421,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, isCopyOnly(false));
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(signalEvent, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(signalEvent, false, false);
|
||||
|
||||
@@ -4482,7 +4482,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
|
||||
}
|
||||
|
||||
if (this->isInOrderExecutionEnabled()) {
|
||||
appendSignalInOrderDependencyCounter(nullptr, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(nullptr, false, false, false, false);
|
||||
}
|
||||
handleInOrderDependencyCounter(nullptr, false, false);
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ constexpr bool CommandListCoreFamily<gfxCoreFamily>::checkIfAllocationImportedRe
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint32_t forceAggregatedEventIncValue, bool useAdditionalTimestamp) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, uint64_t forceAggregatedEventIncValue, bool useAdditionalTimestamp) {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -596,7 +596,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
|
||||
}
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(event, nullptr, nonWalkerSignalingHasRelaxedOrdering, false, CommandToPatch::Invalid);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false, false, false);
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(event, false, false, false, false);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -696,8 +696,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
if (isSplitNeeded) {
|
||||
setupFlagsForBcsSplit(memoryCopyParams, hasStallingCmds, copyOffloadFlush, srcptr, dstptr, size, size);
|
||||
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint32_t aggregatedEventInvValue) {
|
||||
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventInvValue;
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint64_t aggregatedEventIncValue) {
|
||||
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventIncValue;
|
||||
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, memoryCopyParams);
|
||||
};
|
||||
|
||||
@@ -752,7 +752,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
this->getTotalSizeForCopyRegion(srcRegion, srcPitch, srcSlicePitch),
|
||||
this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch));
|
||||
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint32_t aggregatedEventInvValue) {
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint64_t aggregatedEventIncValue) {
|
||||
ze_copy_region_t dstRegionLocal = {};
|
||||
ze_copy_region_t srcRegionLocal = {};
|
||||
memcpy(&dstRegionLocal, dstRegion, sizeof(ze_copy_region_t));
|
||||
@@ -761,7 +761,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
dstRegionLocal.width = static_cast<uint32_t>(sizeParam);
|
||||
srcRegionLocal.originX = srcOriginXParam;
|
||||
srcRegionLocal.width = static_cast<uint32_t>(sizeParam);
|
||||
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventInvValue;
|
||||
memoryCopyParams.forceAggregatedEventIncValue = aggregatedEventIncValue;
|
||||
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(dstPtr, &dstRegionLocal, dstPitch, dstSlicePitch,
|
||||
srcPtr, &srcRegionLocal, srcPitch, srcSlicePitch,
|
||||
hSignalEventParam, 0u, nullptr, memoryCopyParams);
|
||||
@@ -838,8 +838,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
|
||||
setupFlagsForBcsSplit(bcsSplitMemoryCopyParams, hasStallingCmds, copyOffloadFlush, srcAddress, dstAddress, size, size);
|
||||
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstAddressParam, const void *srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint32_t aggregatedEventInvValue) {
|
||||
bcsSplitMemoryCopyParams.forceAggregatedEventIncValue = aggregatedEventInvValue;
|
||||
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, void *dstAddressParam, const void *srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam, uint64_t aggregatedEventIncValue) {
|
||||
bcsSplitMemoryCopyParams.forceAggregatedEventIncValue = aggregatedEventIncValue;
|
||||
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstAddressParam, srcAddressParam, sizeParam, hSignalEventParam, 0u, nullptr, bcsSplitMemoryCopyParams);
|
||||
};
|
||||
|
||||
@@ -1377,7 +1377,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushInOrderCounterSignal(bool waitOnInOrderCounterRequired) {
|
||||
ze_result_t ret = ZE_RESULT_SUCCESS;
|
||||
if (waitOnInOrderCounterRequired && !this->isHeaplessModeEnabled() && this->latestOperationHasOptimizedCbEvent) {
|
||||
this->appendSignalInOrderDependencyCounter(nullptr, false, true, false);
|
||||
this->appendSignalInOrderDependencyCounter(nullptr, false, true, false, false);
|
||||
this->inOrderExecInfo->addCounterValue(this->getInOrderIncrementValue());
|
||||
this->handleInOrderCounterOverflow(false);
|
||||
ret = flushImmediate(ret, false, true, false, NEO::AppendOperations::nonKernel, false, nullptr, false, nullptr, nullptr);
|
||||
@@ -1865,6 +1865,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(signalEvent,
|
||||
copyOffloadOperation,
|
||||
false,
|
||||
false,
|
||||
false);
|
||||
}
|
||||
|
||||
|
||||
@@ -507,12 +507,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
if (compactEvent && compactEvent->isCounterBased()) {
|
||||
auto pcCmdPtr = this->commandContainer.getCommandStream()->getSpace(0u);
|
||||
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
|
||||
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, true, textureFlushRequired);
|
||||
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, true, textureFlushRequired, false);
|
||||
addCmdForPatching(nullptr, pcCmdPtr, nullptr, inOrderCounterValue, NEO::InOrderPatchCommandHelpers::PatchCmdType::pipeControl);
|
||||
textureFlushRequired = false;
|
||||
} else {
|
||||
appendWaitOnSingleEvent(eventForInOrderExec, launchParams.outListCommands, false, false, CommandToPatch::CbEventTimestampPostSyncSemaphoreWait);
|
||||
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(eventForInOrderExec, false, false, false, false);
|
||||
}
|
||||
} else {
|
||||
this->latestOperationHasOptimizedCbEvent = true;
|
||||
|
||||
@@ -14,11 +14,11 @@
|
||||
|
||||
namespace L0 {
|
||||
struct CmdListMemoryCopyParams {
|
||||
uint64_t forceAggregatedEventIncValue = 0;
|
||||
const void *bcsSplitBaseSrcPtr = nullptr;
|
||||
void *bcsSplitBaseDstPtr = nullptr;
|
||||
size_t bcsSplitTotalSrcSize = 0;
|
||||
size_t bcsSplitTotalDstSize = 0;
|
||||
uint32_t forceAggregatedEventIncValue = 0;
|
||||
bool relaxedOrderingDispatch = false;
|
||||
bool forceDisableCopyOnlyInOrderSignaling = false;
|
||||
bool copyOffloadAllowed = false;
|
||||
|
||||
@@ -30,7 +30,7 @@ struct DeviceImp;
|
||||
|
||||
struct BcsSplit {
|
||||
template <GFXCORE_FAMILY gfxCoreFamily, typename T, typename K>
|
||||
using AppendCallFuncT = std::function<ze_result_t(CommandListCoreFamilyImmediate<gfxCoreFamily> *, T, K, size_t, ze_event_handle_t, uint32_t)>;
|
||||
using AppendCallFuncT = std::function<ze_result_t(CommandListCoreFamilyImmediate<gfxCoreFamily> *, T, K, size_t, ze_event_handle_t, uint64_t)>;
|
||||
using CsrContainer = StackVec<NEO::CommandStreamReceiver *, 12u>;
|
||||
|
||||
DeviceImp &device;
|
||||
@@ -81,15 +81,30 @@ struct BcsSplit {
|
||||
NEO::TransferDirection direction,
|
||||
size_t estimatedCmdBufferSize,
|
||||
AppendCallFuncT<gfxCoreFamily, T, K> appendCall) {
|
||||
constexpr size_t maxEventCountInPool = MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType);
|
||||
|
||||
const auto aggregatedEventsMode = this->events.aggregatedEventsMode;
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
ze_result_t result = ZE_RESULT_SUCCESS;
|
||||
auto &cmdListsForSplit = this->getCmdListsForSplit(direction);
|
||||
auto engineCount = cmdListsForSplit.size();
|
||||
size_t markerEventIndex = 0;
|
||||
uint64_t aggregatedEventIncrementVal = 1;
|
||||
|
||||
auto markerEventIndexRet = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
|
||||
if (!markerEventIndexRet.has_value()) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
const bool useSignalEventForSubcopy = aggregatedEventsMode && cmdList->isUsingAdditionalBlitProperties() && Event::isAggregatedEvent(signalEvent) &&
|
||||
(signalEvent->getInOrderIncrementValue() % engineCount == 0);
|
||||
|
||||
if (useSignalEventForSubcopy) {
|
||||
aggregatedEventIncrementVal = signalEvent->getInOrderIncrementValue() / engineCount;
|
||||
} else {
|
||||
auto markerEventIndexRet = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), maxEventCountInPool);
|
||||
if (!markerEventIndexRet.has_value()) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
markerEventIndex = *markerEventIndexRet;
|
||||
}
|
||||
|
||||
auto markerEventIndex = *markerEventIndexRet;
|
||||
|
||||
auto barrierRequired = !cmdList->isInOrderExecutionEnabled() && cmdList->isBarrierRequired();
|
||||
if (barrierRequired) {
|
||||
cmdList->appendSignalEvent(this->events.barrier[markerEventIndex]->toHandle(), false);
|
||||
@@ -98,18 +113,11 @@ struct BcsSplit {
|
||||
auto subcopyEventIndex = markerEventIndex * this->cmdLists.size();
|
||||
StackVec<ze_event_handle_t, 16> eventHandles;
|
||||
|
||||
auto &cmdListsForSplit = this->getCmdListsForSplit(direction);
|
||||
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
if (!cmdList->handleCounterBasedEventOperations(signalEvent, false)) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
const auto aggregatedEventsMode = this->events.aggregatedEventsMode;
|
||||
|
||||
auto totalSize = size;
|
||||
auto engineCount = cmdListsForSplit.size();
|
||||
for (size_t i = 0; i < cmdListsForSplit.size(); i++) {
|
||||
auto subCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(cmdListsForSplit[i]);
|
||||
|
||||
@@ -126,7 +134,7 @@ struct BcsSplit {
|
||||
}
|
||||
subCmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, hasRelaxedOrderingDependencies, false, false, false, false);
|
||||
|
||||
if (signalEvent && i == 0u) {
|
||||
if (!useSignalEventForSubcopy && signalEvent && i == 0u) {
|
||||
subCmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, nullptr, true, true, false, true);
|
||||
}
|
||||
|
||||
@@ -135,8 +143,8 @@ struct BcsSplit {
|
||||
auto localSrcPtr = ptrOffset(srcptr, size - totalSize);
|
||||
|
||||
auto copyEventIndex = aggregatedEventsMode ? markerEventIndex : subcopyEventIndex + i;
|
||||
auto eventHandle = this->events.subcopy[copyEventIndex]->toHandle();
|
||||
result = appendCall(subCmdList, localDstPtr, localSrcPtr, localSize, eventHandle, 1);
|
||||
auto eventHandle = useSignalEventForSubcopy ? signalEvent : this->events.subcopy[copyEventIndex]->toHandle();
|
||||
result = appendCall(subCmdList, localDstPtr, localSrcPtr, localSize, eventHandle, aggregatedEventIncrementVal);
|
||||
subCmdList->flushImmediate(result, true, !hasRelaxedOrderingDependencies, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, false, nullptr, true, nullptr, nullptr);
|
||||
|
||||
if ((aggregatedEventsMode && i == 0) || !aggregatedEventsMode) {
|
||||
@@ -157,7 +165,7 @@ struct BcsSplit {
|
||||
|
||||
const auto isCopyCmdList = cmdList->isCopyOnly(dualStreamCopyOffload);
|
||||
|
||||
if (signalEvent) {
|
||||
if (!useSignalEventForSubcopy && signalEvent) {
|
||||
cmdList->appendSignalEventPostWalker(signalEvent, nullptr, nullptr, !isCopyCmdList, false, isCopyCmdList);
|
||||
}
|
||||
|
||||
@@ -166,11 +174,11 @@ struct BcsSplit {
|
||||
}
|
||||
|
||||
if (cmdList->isInOrderExecutionEnabled()) {
|
||||
cmdList->appendSignalInOrderDependencyCounter(signalEvent, dualStreamCopyOffload, false, false);
|
||||
cmdList->appendSignalInOrderDependencyCounter(signalEvent, dualStreamCopyOffload, false, false, useSignalEventForSubcopy);
|
||||
}
|
||||
cmdList->handleInOrderDependencyCounter(signalEvent, false, dualStreamCopyOffload);
|
||||
|
||||
if (aggregatedEventsMode) {
|
||||
if (aggregatedEventsMode && !useSignalEventForSubcopy) {
|
||||
cmdList->assignInOrderExecInfoToEvent(this->events.marker[markerEventIndex]);
|
||||
}
|
||||
|
||||
|
||||
@@ -322,7 +322,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||
}
|
||||
appendSignalInOrderDependencyCounter(event, false, false, false);
|
||||
appendSignalInOrderDependencyCounter(event, false, false, false, false);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
|
||||
Reference in New Issue
Block a user