refactor: unify event programming methods

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-10-07 16:20:37 +00:00
committed by Compute-Runtime-Automation
parent 6ddb550e05
commit f117f4c938
4 changed files with 70 additions and 71 deletions

View File

@@ -375,6 +375,8 @@ struct CommandListCoreFamily : public CommandListImp {
void appendCopyOperationFence(Event *signalEvent, NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyOffloadOperation);
bool isDeviceToHostCopyEventFenceRequired(Event *signalEvent) const;
bool isDeviceToHostBcsCopy(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyOffloadOperation) const;
bool singleEventPacketRequired(bool inputSinglePacketEventRequest) const;
void programEventL3Flush(Event *event);
NEO::InOrderPatchCommandsContainer<GfxFamily> inOrderPatchCmds;

View File

@@ -4183,4 +4183,60 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCommandLists(uint32_t nu
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation) {
if (copyOperation || singleEventPacketRequired(singlePacketEvent)) {
if (beforeWalker) {
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency, copyOperation);
} else {
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency, copyOperation);
}
} else {
if (event) {
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
event->zeroKernelCount();
} else {
if (event->getKernelCount() > 1) {
if (getDcFlushRequired(event->isSignalScope())) {
programEventL3Flush(event);
}
dispatchEventRemainingPacketsPostSyncOperation(event, copyOperation);
}
}
}
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(Event *event) {
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
: event->getSinglePacketSize();
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
if (event->isUsingContextEndOffset()) {
eventAddress += event->getContextEndOffset();
}
if (partitionCount > 1) {
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + partitionCount);
} else {
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + 1);
}
event->setL3FlushForCurrentKernel();
auto &cmdListStream = *commandContainer.getCommandStream();
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
args.workloadPartitionOffset = partitionCount > 1;
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
cmdListStream,
NEO::PostSyncMode::immediateData,
eventAddress,
Event::STATE_SIGNALED,
device->getNEODevice()->getRootDeviceEnvironment(),
args);
}
} // namespace L0

View File

@@ -110,6 +110,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
appendEventForProfiling(event, nullptr, true, false, false, false);
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
@@ -341,15 +342,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
return appendLaunchKernelWithParams(kernel, threadGroupDimensions, nullptr, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation) {
if (beforeWalker) {
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency, copyOperation);
} else {
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency, copyOperation);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPreemptionMode(Kernel *kernel) {
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
@@ -360,4 +352,9 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::singleEventPacketRequired(bool inputSinglePacketEventRequest) const {
return true;
}
} // namespace L0

View File

@@ -39,42 +39,6 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
return 4 * MemoryConstants::pageSize;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void programEventL3Flush(Event *event,
Device *device,
uint32_t partitionCount,
NEO::CommandContainer &commandContainer) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
: event->getSinglePacketSize();
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
if (event->isUsingContextEndOffset()) {
eventAddress += event->getContextEndOffset();
}
if (partitionCount > 1) {
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + partitionCount);
} else {
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + 1);
}
event->setL3FlushForCurrentKernel();
auto &cmdListStream = *commandContainer.getCommandStream();
NEO::PipeControlArgs args;
args.dcFlushEnable = true;
args.workloadPartitionOffset = partitionCount > 1;
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
cmdListStream,
NEO::PostSyncMode::immediateData,
eventAddress,
Event::STATE_SIGNALED,
commandContainer.getDevice()->getRootDeviceEnvironment(),
args);
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
if (event && compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) {
@@ -426,7 +390,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
} else if (event) {
event->setPacketsInUse(partitionCount);
if (l3FlushEnable) {
programEventL3Flush<gfxCoreFamily>(event, this->device, partitionCount, commandContainer);
programEventL3Flush(event);
}
if (!launchParams.isKernelSplitOperation) {
dispatchEventRemainingPacketsPostSyncOperation(event, false);
@@ -591,31 +555,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
return appendLaunchKernelWithParams(kernel, threadGroupDimensions, event, launchParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation) {
if (copyOperation || singlePacketEvent) {
if (beforeWalker) {
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency, copyOperation);
} else {
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency, copyOperation);
}
} else {
if (event) {
if (beforeWalker) {
event->resetKernelCountAndPacketUsedCount();
event->zeroKernelCount();
} else {
if (event->getKernelCount() > 1) {
if (getDcFlushRequired(event->isSignalScope())) {
programEventL3Flush<gfxCoreFamily>(event, this->device, this->partitionCount, this->commandContainer);
}
dispatchEventRemainingPacketsPostSyncOperation(event, copyOperation);
}
}
}
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
@@ -625,4 +564,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool wor
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::singleEventPacketRequired(bool inputSinglePacketEventRequest) const {
return inputSinglePacketEventRequest;
}
} // namespace L0