mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 17:13:29 +08:00
refactor: unify event programming methods
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6ddb550e05
commit
f117f4c938
@@ -375,6 +375,8 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void appendCopyOperationFence(Event *signalEvent, NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyOffloadOperation);
|
||||
bool isDeviceToHostCopyEventFenceRequired(Event *signalEvent) const;
|
||||
bool isDeviceToHostBcsCopy(NEO::GraphicsAllocation *srcAllocation, NEO::GraphicsAllocation *dstAllocation, bool copyOffloadOperation) const;
|
||||
bool singleEventPacketRequired(bool inputSinglePacketEventRequest) const;
|
||||
void programEventL3Flush(Event *event);
|
||||
|
||||
NEO::InOrderPatchCommandsContainer<GfxFamily> inOrderPatchCmds;
|
||||
|
||||
|
||||
@@ -4183,4 +4183,60 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCommandLists(uint32_t nu
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation) {
|
||||
if (copyOperation || singleEventPacketRequired(singlePacketEvent)) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency, copyOperation);
|
||||
} else {
|
||||
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency, copyOperation);
|
||||
}
|
||||
} else {
|
||||
if (event) {
|
||||
if (beforeWalker) {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
event->zeroKernelCount();
|
||||
} else {
|
||||
if (event->getKernelCount() > 1) {
|
||||
if (getDcFlushRequired(event->isSignalScope())) {
|
||||
programEventL3Flush(event);
|
||||
}
|
||||
dispatchEventRemainingPacketsPostSyncOperation(event, copyOperation);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(Event *event) {
|
||||
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
|
||||
: event->getSinglePacketSize();
|
||||
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
if (partitionCount > 1) {
|
||||
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + partitionCount);
|
||||
} else {
|
||||
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + 1);
|
||||
}
|
||||
|
||||
event->setL3FlushForCurrentKernel();
|
||||
|
||||
auto &cmdListStream = *commandContainer.getCommandStream();
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = true;
|
||||
args.workloadPartitionOffset = partitionCount > 1;
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
cmdListStream,
|
||||
NEO::PostSyncMode::immediateData,
|
||||
eventAddress,
|
||||
Event::STATE_SIGNALED,
|
||||
device->getNEODevice()->getRootDeviceEnvironment(),
|
||||
args);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -110,6 +110,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
appendEventForProfiling(event, nullptr, true, false, false, false);
|
||||
|
||||
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
|
||||
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
|
||||
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
|
||||
@@ -341,15 +342,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
|
||||
return appendLaunchKernelWithParams(kernel, threadGroupDimensions, nullptr, launchParams);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency, copyOperation);
|
||||
} else {
|
||||
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency, copyOperation);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline NEO::PreemptionMode CommandListCoreFamily<gfxCoreFamily>::obtainKernelPreemptionMode(Kernel *kernel) {
|
||||
NEO::PreemptionFlags flags = NEO::PreemptionHelper::createPreemptionLevelFlags(*device->getNEODevice(), &kernel->getImmutableData()->getDescriptor());
|
||||
@@ -360,4 +352,9 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::singleEventPacketRequired(bool inputSinglePacketEventRequest) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -39,42 +39,6 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
|
||||
return 4 * MemoryConstants::pageSize;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void programEventL3Flush(Event *event,
|
||||
Device *device,
|
||||
uint32_t partitionCount,
|
||||
NEO::CommandContainer &commandContainer) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
|
||||
: event->getSinglePacketSize();
|
||||
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
|
||||
if (event->isUsingContextEndOffset()) {
|
||||
eventAddress += event->getContextEndOffset();
|
||||
}
|
||||
|
||||
if (partitionCount > 1) {
|
||||
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + partitionCount);
|
||||
} else {
|
||||
event->setPacketsInUse(event->getPacketsUsedInLastKernel() + 1);
|
||||
}
|
||||
|
||||
event->setL3FlushForCurrentKernel();
|
||||
|
||||
auto &cmdListStream = *commandContainer.getCommandStream();
|
||||
NEO::PipeControlArgs args;
|
||||
args.dcFlushEnable = true;
|
||||
args.workloadPartitionOffset = partitionCount > 1;
|
||||
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
|
||||
cmdListStream,
|
||||
NEO::PostSyncMode::immediateData,
|
||||
eventAddress,
|
||||
Event::STATE_SIGNALED,
|
||||
commandContainer.getDevice()->getRootDeviceEnvironment(),
|
||||
args);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
|
||||
if (event && compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) {
|
||||
@@ -426,7 +390,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
} else if (event) {
|
||||
event->setPacketsInUse(partitionCount);
|
||||
if (l3FlushEnable) {
|
||||
programEventL3Flush<gfxCoreFamily>(event, this->device, partitionCount, commandContainer);
|
||||
programEventL3Flush(event);
|
||||
}
|
||||
if (!launchParams.isKernelSplitOperation) {
|
||||
dispatchEventRemainingPacketsPostSyncOperation(event, false);
|
||||
@@ -591,31 +555,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(Kernel
|
||||
return appendLaunchKernelWithParams(kernel, threadGroupDimensions, event, launchParams);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(Event *event, void **syncCmdBuffer, CommandToPatchContainer *outTimeStampSyncCmds, bool beforeWalker, bool singlePacketEvent, bool skipAddingEventToResidency, bool copyOperation) {
|
||||
if (copyOperation || singlePacketEvent) {
|
||||
if (beforeWalker) {
|
||||
appendEventForProfiling(event, outTimeStampSyncCmds, true, false, skipAddingEventToResidency, copyOperation);
|
||||
} else {
|
||||
appendSignalEventPostWalker(event, syncCmdBuffer, outTimeStampSyncCmds, false, skipAddingEventToResidency, copyOperation);
|
||||
}
|
||||
} else {
|
||||
if (event) {
|
||||
if (beforeWalker) {
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
event->zeroKernelCount();
|
||||
} else {
|
||||
if (event->getKernelCount() > 1) {
|
||||
if (getDcFlushRequired(event->isSignalScope())) {
|
||||
programEventL3Flush<gfxCoreFamily>(event, this->device, this->partitionCount, this->commandContainer);
|
||||
}
|
||||
dispatchEventRemainingPacketsPostSyncOperation(event, copyOperation);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds) {
|
||||
if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
|
||||
@@ -625,4 +564,9 @@ void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool wor
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::singleEventPacketRequired(bool inputSinglePacketEventRequest) const {
|
||||
return inputSinglePacketEventRequest;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user