feature: new heuristic to enable relaxed ordering

Related-To: GSD-10308

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-12-06 16:02:20 +00:00
committed by Compute-Runtime-Automation
parent 75139d2322
commit 526f9c5e81
11 changed files with 214 additions and 6 deletions

View File

@@ -354,7 +354,7 @@ struct CommandListCoreFamily : public CommandListImp {
}
void postInitComputeSetup();
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const { return false; }
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) { return false; }
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
bool canSkipInOrderEventWait(Event &event, bool ignorCbEventBoundToCmdList) const;
bool handleInOrderImplicitDependencies(bool relaxedOrderingAllowed, bool copyOffloadOperation);

View File

@@ -208,7 +208,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
TransferType getTransferType(const CpuMemCopyInfo &cpuMemCopyInfo);
size_t getTransferThreshold(TransferType transferType);
bool isBarrierRequired();
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const override;
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) override;
bool skipInOrderNonWalkerSignalingAllowed(ze_event_handle_t signalEvent) const override;
protected:
@@ -221,12 +221,15 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
void handleInOrderNonWalkerSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result);
CommandQueue *getCmdQImmediate(bool copyOffloadOperation) const;
MOCKABLE_VIRTUAL void checkAssert();
ComputeFlushMethodType computeFlushMethod = nullptr;
uint64_t relaxedOrderingCounter = 0;
std::atomic<bool> dependenciesPresent{false};
bool latestFlushIsHostVisible = false;
bool latestFlushIsCopyOffload = false;
bool keepRelaxedOrderingEnabled = false;
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@@ -398,7 +398,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, bool requireTaskCountUpdate) {
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, requireTaskCountUpdate, copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate);
return executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, kernelOperation, requireTaskCountUpdate, getCmdQImmediate(copyOffloadSubmission));
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -546,7 +546,9 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderNonWalkerSignal
bool nonWalkerSignalingHasRelaxedOrdering = false;
if (NEO::debugManager.flags.EnableInOrderRelaxedOrderingForEventsChaining.get() != 0) {
auto counterValueBeforeSecondCheck = this->relaxedOrderingCounter;
nonWalkerSignalingHasRelaxedOrdering = isRelaxedOrderingDispatchAllowed(1, false);
this->relaxedOrderingCounter = counterValueBeforeSecondCheck; // dont increment twice
}
if (nonWalkerSignalingHasRelaxedOrdering) {
@@ -1064,13 +1066,18 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
return hostSynchronize(timeout, true);
}
template <GFXCORE_FAMILY gfxCoreFamily>
CommandQueue *CommandListCoreFamilyImmediate<gfxCoreFamily>::getCmdQImmediate(bool copyOffloadOperation) const {
return copyOffloadOperation ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, bool kernelOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent,
bool requireTaskCountUpdate) {
auto signalEvent = Event::fromHandle(hSignalEvent);
auto queue = copyOffloadSubmission ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
auto queue = getCmdQImmediate(copyOffloadSubmission);
this->latestFlushIsCopyOffload = copyOffloadSubmission;
if (NEO::debugManager.flags.DeferStateInitSubmissionToFirstRegularUsage.get() == 1) {
@@ -1431,10 +1438,46 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) const {
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents, bool copyOffload) {
auto csr = getCsr(copyOffload);
if (!csr->directSubmissionRelaxedOrderingEnabled()) {
return false;
}
auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0);
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*getCsr(copyOffload), numEvents);
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristic.get() == 1) {
uint32_t relaxedOrderingCounterThreshold = csr->getDirectSubmissionRelaxedOrderingQueueDepth();
auto queueTaskCount = getCmdQImmediate(copyOffload)->getTaskCount();
auto csrTaskCount = csr->peekTaskCount();
if ((this->device->getNEODevice()->isInitDeviceWithFirstSubmissionSupported(csr->getType()) || this->heaplessStateInitEnabled) && csr->peekTaskCount() == 1) {
DEBUG_BREAK_IF(queueTaskCount != 0);
queueTaskCount = 1;
}
if (NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get() != -1) {
relaxedOrderingCounterThreshold = static_cast<uint32_t>(NEO::debugManager.flags.DirectSubmissionRelaxedOrderingCounterHeuristicTreshold.get());
}
if (queueTaskCount == csrTaskCount) {
relaxedOrderingCounter++;
} else {
// Submission from another queue. Reset counter and keep relaxed ordering allowed
relaxedOrderingCounter = 0;
this->keepRelaxedOrderingEnabled = true;
}
if (relaxedOrderingCounter > static_cast<uint64_t>(relaxedOrderingCounterThreshold)) {
this->keepRelaxedOrderingEnabled = false;
return false;
}
return (keepRelaxedOrderingEnabled && (numEvents > 0));
}
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*csr, numEvents);
}
template <GFXCORE_FAMILY gfxCoreFamily>