refactor: add skip event residency flag to compacted event
Related-To: NEO-10064 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
a5989eb31b
commit
6011eb147f
|
@ -182,7 +182,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||||
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
|
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
|
||||||
void appendMultiPartitionEpilogue() override;
|
void appendMultiPartitionEpilogue() override;
|
||||||
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent);
|
void appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency);
|
||||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency);
|
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency);
|
||||||
|
|
||||||
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
||||||
|
@ -289,7 +289,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||||
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
|
void adjustWriteKernelTimestamp(uint64_t globalAddress, uint64_t contextAddress, bool maskLsb, uint32_t mask, bool workloadPartition);
|
||||||
void appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling);
|
void appendEventForProfiling(Event *event, bool beforeWalker, bool skipBarrierForEndProfiling);
|
||||||
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
|
void appendEventForProfilingCopyCommand(Event *event, bool beforeWalker);
|
||||||
void appendSignalEventPostWalker(Event *event, bool skipBarrierForEndProfiling);
|
void appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency);
|
||||||
virtual void programStateBaseAddress(NEO::CommandContainer &container, bool useSbaProperties);
|
virtual void programStateBaseAddress(NEO::CommandContainer &container, bool useSbaProperties);
|
||||||
void appendComputeBarrierCommand();
|
void appendComputeBarrierCommand();
|
||||||
NEO::PipeControlArgs createBarrierFlags();
|
NEO::PipeControlArgs createBarrierFlags();
|
||||||
|
@ -320,9 +320,9 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||||
CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations);
|
CmdListEventOperation estimateEventPostSync(Event *event, uint32_t operations);
|
||||||
void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
void dispatchPostSyncCopy(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
||||||
void dispatchPostSyncCompute(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
void dispatchPostSyncCompute(uint64_t gpuAddress, uint32_t value, bool workloadPartition);
|
||||||
void dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming);
|
void dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, void **syncCmdBuffer, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming);
|
||||||
void dispatchEventRemainingPacketsPostSyncOperation(Event *event);
|
void dispatchEventRemainingPacketsPostSyncOperation(Event *event);
|
||||||
void dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming);
|
void dispatchEventPostSyncOperation(Event *event, void **syncCmdBuffer, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming);
|
||||||
bool isKernelUncachedMocsRequired(bool kernelState) {
|
bool isKernelUncachedMocsRequired(bool kernelState) {
|
||||||
this->containsStatelessUncachedResource |= kernelState;
|
this->containsStatelessUncachedResource |= kernelState;
|
||||||
if (this->stateBaseAddressTracking) {
|
if (this->stateBaseAddressTracking) {
|
||||||
|
|
|
@ -465,7 +465,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||||
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
|
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), pDispatchArgumentsBuffer,
|
||||||
nullptr, launchParams);
|
nullptr, launchParams);
|
||||||
addToMappedEventList(event);
|
addToMappedEventList(event);
|
||||||
appendSignalEventPostWalker(event, false);
|
appendSignalEventPostWalker(event, nullptr, false, false);
|
||||||
|
|
||||||
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
handleInOrderDependencyCounter(event, isInOrderNonWalkerSignalingRequired(event));
|
||||||
|
|
||||||
|
@ -516,7 +516,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
addToMappedEventList(event);
|
addToMappedEventList(event);
|
||||||
appendSignalEventPostWalker(event, false);
|
appendSignalEventPostWalker(event, nullptr, false, false);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -554,7 +554,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
|
||||||
bool useMaxPackets = event->isEventTimestampFlagSet() || (event->getPacketsInUse() < this->partitionCount);
|
bool useMaxPackets = event->isEventTimestampFlagSet() || (event->getPacketsInUse() < this->partitionCount);
|
||||||
|
|
||||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||||
dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false);
|
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_CLEARED, false, useMaxPackets, appendPipeControlWithPostSync, false);
|
||||||
|
|
||||||
if (!isCopyOnly()) {
|
if (!isCopyOnly()) {
|
||||||
if (this->partitionCount > 1) {
|
if (this->partitionCount > 1) {
|
||||||
|
@ -602,7 +602,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||||
|
|
||||||
appendEventForProfiling(signalEvent, true, false);
|
appendEventForProfiling(signalEvent, true, false);
|
||||||
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
applyMemoryRangesBarrier(numRanges, pRangeSizes, pRanges);
|
||||||
appendSignalEventPostWalker(signalEvent, false);
|
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||||
addToMappedEventList(signalEvent);
|
addToMappedEventList(signalEvent);
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
|
@ -1272,7 +1272,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||||
}
|
}
|
||||||
makeResidentDummyAllocation();
|
makeResidentDummyAllocation();
|
||||||
|
|
||||||
appendSignalEventPostWalker(signalEvent, false);
|
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1301,7 +1301,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
|
||||||
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa);
|
NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForImageRegion(blitProperties, *commandContainer.getCommandStream(), dummyBlitWa);
|
||||||
makeResidentDummyAllocation();
|
makeResidentDummyAllocation();
|
||||||
|
|
||||||
appendSignalEventPostWalker(signalEvent, false);
|
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1459,7 +1459,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||||
|
|
||||||
launchParams.pipeControlSignalling = (signalEvent && singlePipeControlPacket) || dstAllocationStruct.needsFlush;
|
launchParams.pipeControlSignalling = (signalEvent && singlePipeControlPacket) || dstAllocationStruct.needsFlush;
|
||||||
|
|
||||||
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false);
|
||||||
|
|
||||||
if (isCopyOnly()) {
|
if (isCopyOnly()) {
|
||||||
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
|
||||||
|
@ -1524,7 +1524,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false);
|
||||||
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
||||||
addToMappedEventList(signalEvent);
|
addToMappedEventList(signalEvent);
|
||||||
|
|
||||||
|
@ -1922,7 +1922,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||||
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
|
launchParams.isKernelSplitOperation = (fillArguments.leftRemainingBytes > 0 || fillArguments.rightRemainingBytes > 0);
|
||||||
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
bool singlePipeControlPacket = eventSignalPipeControl(launchParams.isKernelSplitOperation, dcFlush);
|
||||||
|
|
||||||
appendEventForProfilingAllWalkers(signalEvent, true, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, nullptr, true, singlePipeControlPacket, false);
|
||||||
|
|
||||||
if (fillArguments.leftRemainingBytes > 0) {
|
if (fillArguments.leftRemainingBytes > 0) {
|
||||||
launchParams.numKernelsInSplitLaunch++;
|
launchParams.numKernelsInSplitLaunch++;
|
||||||
|
@ -2067,7 +2067,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||||
}
|
}
|
||||||
|
|
||||||
addToMappedEventList(signalEvent);
|
addToMappedEventList(signalEvent);
|
||||||
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
|
appendEventForProfilingAllWalkers(signalEvent, nullptr, false, singlePipeControlPacket, false);
|
||||||
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
|
||||||
|
|
||||||
bool nonWalkerInOrderCmdChaining = false;
|
bool nonWalkerInOrderCmdChaining = false;
|
||||||
|
@ -2144,7 +2144,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||||
this->dummyBlitWa);
|
this->dummyBlitWa);
|
||||||
makeResidentDummyAllocation();
|
makeResidentDummyAllocation();
|
||||||
|
|
||||||
appendSignalEventPostWalker(signalEvent, false);
|
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled()) {
|
if (isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
|
@ -2155,7 +2155,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, bool skipBarrierForEndProfiling) {
|
void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *event, void **syncCmdBuffer, bool skipBarrierForEndProfiling, bool skipAddingEventToResidency) {
|
||||||
if (event == nullptr || !event->getPoolAllocation(this->device)) {
|
if (event == nullptr || !event->getPoolAllocation(this->device)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2163,10 +2163,12 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalEventPostWalker(Event *ev
|
||||||
appendEventForProfiling(event, false, skipBarrierForEndProfiling);
|
appendEventForProfiling(event, false, skipBarrierForEndProfiling);
|
||||||
} else {
|
} else {
|
||||||
event->resetKernelCountAndPacketUsedCount();
|
event->resetKernelCountAndPacketUsedCount();
|
||||||
|
if (!skipAddingEventToResidency) {
|
||||||
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
|
commandContainer.addToResidencyContainer(event->getPoolAllocation(this->device));
|
||||||
|
}
|
||||||
|
|
||||||
event->setPacketsInUse(this->partitionCount);
|
event->setPacketsInUse(this->partitionCount);
|
||||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, !isCopyOnly(), false);
|
dispatchEventPostSyncOperation(event, syncCmdBuffer, Event::STATE_SIGNALED, false, false, !isCopyOnly(), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2183,7 +2185,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
|
||||||
NEO::MiFlushArgs args{this->dummyBlitWa};
|
NEO::MiFlushArgs args{this->dummyBlitWa};
|
||||||
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), 0, 0, args);
|
NEO::EncodeMiFlushDW<GfxFamily>::programWithWa(*commandContainer.getCommandStream(), 0, 0, args);
|
||||||
makeResidentDummyAllocation();
|
makeResidentDummyAllocation();
|
||||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, true, false, false, false);
|
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, false);
|
||||||
}
|
}
|
||||||
appendWriteKernelTimestamp(event, beforeWalker, false, false);
|
appendWriteKernelTimestamp(event, beforeWalker, false, false);
|
||||||
}
|
}
|
||||||
|
@ -2376,7 +2378,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||||
|
|
||||||
event->setPacketsInUse(this->partitionCount);
|
event->setPacketsInUse(this->partitionCount);
|
||||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false);
|
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false);
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(event);
|
appendSignalInOrderDependencyCounter(event);
|
||||||
|
@ -2643,7 +2645,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(Event *event,
|
||||||
bool workloadPartition = setupTimestampEventForMultiTile(event);
|
bool workloadPartition = setupTimestampEventForMultiTile(event);
|
||||||
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
appendWriteKernelTimestamp(event, beforeWalker, true, workloadPartition);
|
||||||
} else {
|
} else {
|
||||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, true, false, false, true);
|
dispatchEventPostSyncOperation(event, nullptr, Event::STATE_SIGNALED, true, false, false, true);
|
||||||
|
|
||||||
const auto &rootDeviceEnvironment = this->device->getNEODevice()->getRootDeviceEnvironment();
|
const auto &rootDeviceEnvironment = this->device->getNEODevice()->getRootDeviceEnvironment();
|
||||||
|
|
||||||
|
@ -2713,7 +2715,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||||
args);
|
args);
|
||||||
}
|
}
|
||||||
|
|
||||||
appendSignalEventPostWalker(signalEvent, false);
|
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
|
@ -3260,7 +3262,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
|
||||||
}
|
}
|
||||||
|
|
||||||
addToMappedEventList(signalEvent);
|
addToMappedEventList(signalEvent);
|
||||||
appendSignalEventPostWalker(signalEvent, this->isInOrderExecutionEnabled());
|
appendSignalEventPostWalker(signalEvent, nullptr, this->isInOrderExecutionEnabled(), false);
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled()) {
|
if (isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
|
@ -3425,7 +3427,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment);
|
NEO::MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(*commandContainer.getCommandStream(), gpuAddress, true, rootDeviceEnvironment);
|
||||||
}
|
}
|
||||||
|
|
||||||
appendSignalEventPostWalker(signalEvent, false);
|
appendSignalEventPostWalker(signalEvent, nullptr, false, false);
|
||||||
|
|
||||||
if (this->isInOrderExecutionEnabled()) {
|
if (this->isInOrderExecutionEnabled()) {
|
||||||
appendSignalInOrderDependencyCounter(signalEvent);
|
appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
|
@ -3553,7 +3555,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute(uint64_t gpuA
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming) {
|
void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdListEventOperation &eventOperations, uint64_t gpuAddress, void **syncCmdBuffer, uint32_t value, bool useLastPipeControl, bool signalScope, bool skipPartitionOffsetProgramming) {
|
||||||
decltype(&CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute) dispatchFunction = &CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute;
|
decltype(&CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute) dispatchFunction = &CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCompute;
|
||||||
if (isCopyOnly()) {
|
if (isCopyOnly()) {
|
||||||
dispatchFunction = &CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCopy;
|
dispatchFunction = &CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCopy;
|
||||||
|
@ -3603,7 +3605,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdLis
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming) {
|
void CommandListCoreFamily<gfxCoreFamily>::dispatchEventPostSyncOperation(Event *event, void **syncCmdBuffer, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl, bool skipPartitionOffsetProgramming) {
|
||||||
uint32_t packets = event->getPacketsInUse();
|
uint32_t packets = event->getPacketsInUse();
|
||||||
if (this->signalAllEventPackets || useMax) {
|
if (this->signalAllEventPackets || useMax) {
|
||||||
packets = event->getMaxPacketsCount();
|
packets = event->getMaxPacketsCount();
|
||||||
|
@ -3616,7 +3618,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchEventPostSyncOperation(Event
|
||||||
eventPostSync.operationCount--;
|
eventPostSync.operationCount--;
|
||||||
}
|
}
|
||||||
|
|
||||||
dispatchPostSyncCommands(eventPostSync, gpuAddress, value, useLastPipeControl, event->isSignalScope(), skipPartitionOffsetProgramming);
|
dispatchPostSyncCommands(eventPostSync, gpuAddress, syncCmdBuffer, value, useLastPipeControl, event->isSignalScope(), skipPartitionOffsetProgramming);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
@ -3629,7 +3631,7 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchEventRemainingPacketsPostSync
|
||||||
eventAddress += event->getSinglePacketSize() * event->getPacketsInUse();
|
eventAddress += event->getSinglePacketSize() * event->getPacketsInUse();
|
||||||
|
|
||||||
constexpr bool appendLastPipeControl = false;
|
constexpr bool appendLastPipeControl = false;
|
||||||
dispatchPostSyncCommands(remainingPacketsOperation, eventAddress, Event::STATE_SIGNALED, appendLastPipeControl, event->isSignalScope(), false);
|
dispatchPostSyncCommands(remainingPacketsOperation, eventAddress, nullptr, Event::STATE_SIGNALED, appendLastPipeControl, event->isSignalScope(), false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -247,7 +247,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||||
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
|
||||||
}
|
}
|
||||||
|
|
||||||
appendSignalEventPostWalker(event, false);
|
appendSignalEventPostWalker(event, nullptr, false, false);
|
||||||
|
|
||||||
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
|
commandContainer.addToResidencyContainer(kernelImmutableData->getIsaGraphicsAllocation());
|
||||||
auto &residencyContainer = kernel->getResidencyContainer();
|
auto &residencyContainer = kernel->getResidencyContainer();
|
||||||
|
@ -329,11 +329,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent) {
|
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
|
||||||
if (beforeWalker) {
|
if (beforeWalker) {
|
||||||
appendEventForProfiling(event, true, false);
|
appendEventForProfiling(event, true, false);
|
||||||
} else {
|
} else {
|
||||||
appendSignalEventPostWalker(event, false);
|
appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -286,7 +286,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||||
std::list<void *> additionalCommands;
|
std::list<void *> additionalCommands;
|
||||||
|
|
||||||
if (compactEvent) {
|
if (compactEvent) {
|
||||||
appendEventForProfilingAllWalkers(compactEvent, true, true);
|
appendEventForProfilingAllWalkers(compactEvent, nullptr, true, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling);
|
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation && !launchParams.pipeControlSignalling);
|
||||||
|
@ -297,7 +297,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||||
|
|
||||||
if (inOrderExecSignalRequired) {
|
if (inOrderExecSignalRequired) {
|
||||||
if (inOrderNonWalkerSignalling) {
|
if (inOrderNonWalkerSignalling) {
|
||||||
dispatchEventPostSyncOperation(eventForInOrderExec, Event::STATE_CLEARED, false, false, false, false);
|
dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, Event::STATE_CLEARED, false, false, false, false);
|
||||||
} else {
|
} else {
|
||||||
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
|
inOrderCounterValue = this->inOrderExecInfo->getCounterValue() + getInOrderIncrementValue();
|
||||||
inOrderExecInfo = this->inOrderExecInfo.get();
|
inOrderExecInfo = this->inOrderExecInfo.get();
|
||||||
|
@ -348,7 +348,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||||
}
|
}
|
||||||
|
|
||||||
if (compactEvent) {
|
if (compactEvent) {
|
||||||
appendEventForProfilingAllWalkers(compactEvent, false, true);
|
appendEventForProfilingAllWalkers(compactEvent, nullptr, false, true, launchParams.omitAddingEventResidency);
|
||||||
} else if (event) {
|
} else if (event) {
|
||||||
event->setPacketsInUse(partitionCount);
|
event->setPacketsInUse(partitionCount);
|
||||||
if (l3FlushEnable) {
|
if (l3FlushEnable) {
|
||||||
|
@ -506,12 +506,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent) {
|
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency) {
|
||||||
if (isCopyOnly() || singlePacketEvent) {
|
if (isCopyOnly() || singlePacketEvent) {
|
||||||
if (beforeWalker) {
|
if (beforeWalker) {
|
||||||
appendEventForProfiling(event, true, false);
|
appendEventForProfiling(event, true, false);
|
||||||
} else {
|
} else {
|
||||||
appendSignalEventPostWalker(event, false);
|
appendSignalEventPostWalker(event, syncCmdBuffer, false, skipAddingEventToResidency);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (event) {
|
if (event) {
|
||||||
|
|
|
@ -109,7 +109,7 @@ struct BcsSplit {
|
||||||
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true);
|
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true);
|
||||||
|
|
||||||
if (signalEvent && i == 0u) {
|
if (signalEvent && i == 0u) {
|
||||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, true, true);
|
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, true, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto localSize = totalSize / engineCount;
|
auto localSize = totalSize / engineCount;
|
||||||
|
@ -137,9 +137,9 @@ struct BcsSplit {
|
||||||
|
|
||||||
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true);
|
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true);
|
||||||
if (signalEvent) {
|
if (signalEvent) {
|
||||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, false, true);
|
cmdList->appendEventForProfilingAllWalkers(signalEvent, nullptr, false, true, false);
|
||||||
}
|
}
|
||||||
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true);
|
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], nullptr, false, true, false);
|
||||||
|
|
||||||
if (cmdList->isInOrderExecutionEnabled()) {
|
if (cmdList->isInOrderExecutionEnabled()) {
|
||||||
cmdList->appendSignalInOrderDependencyCounter(signalEvent);
|
cmdList->appendSignalInOrderDependencyCounter(signalEvent);
|
||||||
|
|
|
@ -2945,7 +2945,7 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
|
||||||
|
|
||||||
auto usedBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
|
auto usedBefore = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||||
|
|
||||||
commandList->appendSignalEventPostWalker(nullptr, false);
|
commandList->appendSignalEventPostWalker(nullptr, nullptr, false, false);
|
||||||
|
|
||||||
EXPECT_EQ(commandList->getCmdContainer().getCommandStream()->getUsed(), usedBefore);
|
EXPECT_EQ(commandList->getCmdContainer().getCommandStream()->getUsed(), usedBefore);
|
||||||
}
|
}
|
||||||
|
|
|
@ -412,7 +412,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||||
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||||
|
|
||||||
commandList->partitionCount = packets;
|
commandList->partitionCount = packets;
|
||||||
commandList->appendSignalEventPostWalker(event.get(), false);
|
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||||
EXPECT_EQ(packets, event->getPacketsInUse());
|
EXPECT_EQ(packets, event->getPacketsInUse());
|
||||||
|
|
||||||
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
||||||
|
@ -472,7 +472,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||||
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||||
|
|
||||||
commandList->partitionCount = packets;
|
commandList->partitionCount = packets;
|
||||||
commandList->appendSignalEventPostWalker(event.get(), false);
|
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||||
EXPECT_EQ(packets, event->getPacketsInUse());
|
EXPECT_EQ(packets, event->getPacketsInUse());
|
||||||
|
|
||||||
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
||||||
|
@ -645,7 +645,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||||
|
|
||||||
event->setEventTimestampFlag(false);
|
event->setEventTimestampFlag(false);
|
||||||
|
|
||||||
commandList->appendSignalEventPostWalker(event.get(), false);
|
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||||
size_t usedAfterSize = cmdStream->getUsed();
|
size_t usedAfterSize = cmdStream->getUsed();
|
||||||
|
|
||||||
GenCmdList cmdList;
|
GenCmdList cmdList;
|
||||||
|
|
|
@ -950,7 +950,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture {
|
||||||
|
|
||||||
commandList->setupTimestampEventForMultiTile(event.get());
|
commandList->setupTimestampEventForMultiTile(event.get());
|
||||||
size_t sizeBefore = cmdStream->getUsed();
|
size_t sizeBefore = cmdStream->getUsed();
|
||||||
commandList->appendSignalEventPostWalker(event.get(), false);
|
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||||
size_t sizeAfter = cmdStream->getUsed();
|
size_t sizeAfter = cmdStream->getUsed();
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
@ -2397,5 +2397,39 @@ HWTEST2_F(CommandListCreate, givenPlatformSupportsHdcUntypedCacheFlushWhenAppend
|
||||||
EXPECT_TRUE(timestampPostSyncFound);
|
EXPECT_TRUE(timestampPostSyncFound);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListCreate, givenAppendSignalEventWhenSkipAddToResidencyTrueThenEventAllocationNotAddedToResidency, IsAtLeastXeHpCore) {
|
||||||
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = 0;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.index = 0;
|
||||||
|
eventDesc.signal = 0;
|
||||||
|
|
||||||
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||||
|
ASSERT_NE(nullptr, event.get());
|
||||||
|
|
||||||
|
auto &residencyContainer = commandList->getCmdContainer().getResidencyContainer();
|
||||||
|
auto eventAllocation = event->getPoolAllocation(device);
|
||||||
|
|
||||||
|
bool skipAdd = true;
|
||||||
|
commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, false, true, skipAdd);
|
||||||
|
|
||||||
|
auto eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
|
||||||
|
EXPECT_EQ(residencyContainer.end(), eventAllocIt);
|
||||||
|
|
||||||
|
skipAdd = false;
|
||||||
|
|
||||||
|
commandList->appendEventForProfilingAllWalkers(event.get(), nullptr, false, true, skipAdd);
|
||||||
|
eventAllocIt = std::find(residencyContainer.begin(), residencyContainer.end(), eventAllocation);
|
||||||
|
EXPECT_NE(residencyContainer.end(), eventAllocIt);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -686,7 +686,7 @@ HWTEST2_F(CommandListEventFenceTestsXeHpcCore, givenCommandListWithRegularEventA
|
||||||
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
|
||||||
commandList->appendSignalEventPostWalker(event.get(), false);
|
commandList->appendSignalEventPostWalker(event.get(), nullptr, false, false);
|
||||||
|
|
||||||
GenCmdList cmdList;
|
GenCmdList cmdList;
|
||||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||||
|
|
Loading…
Reference in New Issue